qemu/cpus.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "config-host.h"
  27
  28#include "monitor/monitor.h"
  29#include "qapi/qmp/qerror.h"
  30#include "qemu/error-report.h"
  31#include "sysemu/sysemu.h"
  32#include "exec/gdbstub.h"
  33#include "sysemu/dma.h"
  34#include "sysemu/kvm.h"
  35#include "qmp-commands.h"
  36
  37#include "qemu/thread.h"
  38#include "sysemu/cpus.h"
  39#include "sysemu/qtest.h"
  40#include "qemu/main-loop.h"
  41#include "qemu/bitmap.h"
  42#include "qemu/seqlock.h"
  43#include "qapi-event.h"
  44#include "hw/nmi.h"
  45#include "sysemu/replay.h"
  46
  47#ifndef _WIN32
  48#include "qemu/compatfd.h"
  49#endif
  50
  51#ifdef CONFIG_LINUX
  52
  53#include <sys/prctl.h>
  54
  55#ifndef PR_MCE_KILL
  56#define PR_MCE_KILL 33
  57#endif
  58
  59#ifndef PR_MCE_KILL_SET
  60#define PR_MCE_KILL_SET 1
  61#endif
  62
  63#ifndef PR_MCE_KILL_EARLY
  64#define PR_MCE_KILL_EARLY 1
  65#endif
  66
  67#endif /* CONFIG_LINUX */
  68
  69static CPUState *next_cpu;
  70int64_t max_delay;
  71int64_t max_advance;
  72
  73/* vcpu throttling controls */
  74static QEMUTimer *throttle_timer;
  75static unsigned int throttle_percentage;
  76
  77#define CPU_THROTTLE_PCT_MIN 1
  78#define CPU_THROTTLE_PCT_MAX 99
  79#define CPU_THROTTLE_TIMESLICE_NS 10000000
  80
  81bool cpu_is_stopped(CPUState *cpu)
  82{
  83    return cpu->stopped || !runstate_is_running();
  84}
  85
  86static bool cpu_thread_is_idle(CPUState *cpu)
  87{
  88    if (cpu->stop || cpu->queued_work_first) {
  89        return false;
  90    }
  91    if (cpu_is_stopped(cpu)) {
  92        return true;
  93    }
  94    if (!cpu->halted || cpu_has_work(cpu) ||
  95        kvm_halt_in_kernel()) {
  96        return false;
  97    }
  98    return true;
  99}
 100
 101static bool all_cpu_threads_idle(void)
 102{
 103    CPUState *cpu;
 104
 105    CPU_FOREACH(cpu) {
 106        if (!cpu_thread_is_idle(cpu)) {
 107            return false;
 108        }
 109    }
 110    return true;
 111}
 112
 113/***********************************************************/
 114/* guest cycle counter */
 115
 116/* Protected by TimersState seqlock */
 117
 118static bool icount_sleep = true;
 119static int64_t vm_clock_warp_start = -1;
 120/* Conversion factor from emulated instructions to virtual clock ticks.  */
 121static int icount_time_shift;
 122/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 123#define MAX_ICOUNT_SHIFT 10
 124
 125static QEMUTimer *icount_rt_timer;
 126static QEMUTimer *icount_vm_timer;
 127static QEMUTimer *icount_warp_timer;
 128
 129typedef struct TimersState {
 130    /* Protected by BQL.  */
 131    int64_t cpu_ticks_prev;
 132    int64_t cpu_ticks_offset;
 133
 134    /* cpu_clock_offset can be read out of BQL, so protect it with
 135     * this lock.
 136     */
 137    QemuSeqLock vm_clock_seqlock;
 138    int64_t cpu_clock_offset;
 139    int32_t cpu_ticks_enabled;
 140    int64_t dummy;
 141
 142    /* Compensate for varying guest execution speed.  */
 143    int64_t qemu_icount_bias;
 144    /* Only written by TCG thread */
 145    int64_t qemu_icount;
 146} TimersState;
 147
 148static TimersState timers_state;
 149
 150int64_t cpu_get_icount_raw(void)
 151{
 152    int64_t icount;
 153    CPUState *cpu = current_cpu;
 154
 155    icount = timers_state.qemu_icount;
 156    if (cpu) {
 157        if (!cpu->can_do_io) {
 158            fprintf(stderr, "Bad icount read\n");
 159            exit(1);
 160        }
 161        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
 162    }
 163    return icount;
 164}
 165
 166/* Return the virtual CPU time, based on the instruction counter.  */
 167static int64_t cpu_get_icount_locked(void)
 168{
 169    int64_t icount = cpu_get_icount_raw();
 170    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 171}
 172
 173int64_t cpu_get_icount(void)
 174{
 175    int64_t icount;
 176    unsigned start;
 177
 178    do {
 179        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 180        icount = cpu_get_icount_locked();
 181    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 182
 183    return icount;
 184}
 185
 186int64_t cpu_icount_to_ns(int64_t icount)
 187{
 188    return icount << icount_time_shift;
 189}
 190
 191/* return the host CPU cycle counter and handle stop/restart */
 192/* Caller must hold the BQL */
 193int64_t cpu_get_ticks(void)
 194{
 195    int64_t ticks;
 196
 197    if (use_icount) {
 198        return cpu_get_icount();
 199    }
 200
 201    ticks = timers_state.cpu_ticks_offset;
 202    if (timers_state.cpu_ticks_enabled) {
 203        ticks += cpu_get_host_ticks();
 204    }
 205
 206    if (timers_state.cpu_ticks_prev > ticks) {
 207        /* Note: non increasing ticks may happen if the host uses
 208           software suspend */
 209        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 210        ticks = timers_state.cpu_ticks_prev;
 211    }
 212
 213    timers_state.cpu_ticks_prev = ticks;
 214    return ticks;
 215}
 216
 217static int64_t cpu_get_clock_locked(void)
 218{
 219    int64_t ticks;
 220
 221    ticks = timers_state.cpu_clock_offset;
 222    if (timers_state.cpu_ticks_enabled) {
 223        ticks += get_clock();
 224    }
 225
 226    return ticks;
 227}
 228
 229/* return the host CPU monotonic timer and handle stop/restart */
 230int64_t cpu_get_clock(void)
 231{
 232    int64_t ti;
 233    unsigned start;
 234
 235    do {
 236        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 237        ti = cpu_get_clock_locked();
 238    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 239
 240    return ti;
 241}
 242
 243/* enable cpu_get_ticks()
 244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 245 */
 246void cpu_enable_ticks(void)
 247{
 248    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 249    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 250    if (!timers_state.cpu_ticks_enabled) {
 251        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 252        timers_state.cpu_clock_offset -= get_clock();
 253        timers_state.cpu_ticks_enabled = 1;
 254    }
 255    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 256}
 257
 258/* disable cpu_get_ticks() : the clock is stopped. You must not call
 259 * cpu_get_ticks() after that.
 260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 261 */
 262void cpu_disable_ticks(void)
 263{
 264    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 265    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 266    if (timers_state.cpu_ticks_enabled) {
 267        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 268        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 269        timers_state.cpu_ticks_enabled = 0;
 270    }
 271    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 272}
 273
 274/* Correlation between real and virtual time is always going to be
 275   fairly approximate, so ignore small variation.
 276   When the guest is idle real and virtual time will be aligned in
 277   the IO wait loop.  */
 278#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
 279
 280static void icount_adjust(void)
 281{
 282    int64_t cur_time;
 283    int64_t cur_icount;
 284    int64_t delta;
 285
 286    /* Protected by TimersState mutex.  */
 287    static int64_t last_delta;
 288
 289    /* If the VM is not running, then do nothing.  */
 290    if (!runstate_is_running()) {
 291        return;
 292    }
 293
 294    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 295    cur_time = cpu_get_clock_locked();
 296    cur_icount = cpu_get_icount_locked();
 297
 298    delta = cur_icount - cur_time;
 299    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 300    if (delta > 0
 301        && last_delta + ICOUNT_WOBBLE < delta * 2
 302        && icount_time_shift > 0) {
 303        /* The guest is getting too far ahead.  Slow time down.  */
 304        icount_time_shift--;
 305    }
 306    if (delta < 0
 307        && last_delta - ICOUNT_WOBBLE > delta * 2
 308        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 309        /* The guest is getting too far behind.  Speed time up.  */
 310        icount_time_shift++;
 311    }
 312    last_delta = delta;
 313    timers_state.qemu_icount_bias = cur_icount
 314                              - (timers_state.qemu_icount << icount_time_shift);
 315    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 316}
 317
 318static void icount_adjust_rt(void *opaque)
 319{
 320    timer_mod(icount_rt_timer,
 321              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 322    icount_adjust();
 323}
 324
 325static void icount_adjust_vm(void *opaque)
 326{
 327    timer_mod(icount_vm_timer,
 328                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 329                   get_ticks_per_sec() / 10);
 330    icount_adjust();
 331}
 332
 333static int64_t qemu_icount_round(int64_t count)
 334{
 335    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 336}
 337
 338static void icount_warp_rt(void)
 339{
 340    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 341     * changes from -1 to another value, so the race here is okay.
 342     */
 343    if (atomic_read(&vm_clock_warp_start) == -1) {
 344        return;
 345    }
 346
 347    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 348    if (runstate_is_running()) {
 349        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 350                                     cpu_get_clock_locked());
 351        int64_t warp_delta;
 352
 353        warp_delta = clock - vm_clock_warp_start;
 354        if (use_icount == 2) {
 355            /*
 356             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 357             * far ahead of real time.
 358             */
 359            int64_t cur_icount = cpu_get_icount_locked();
 360            int64_t delta = clock - cur_icount;
 361            warp_delta = MIN(warp_delta, delta);
 362        }
 363        timers_state.qemu_icount_bias += warp_delta;
 364    }
 365    vm_clock_warp_start = -1;
 366    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 367
 368    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 369        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 370    }
 371}
 372
 373static void icount_dummy_timer(void *opaque)
 374{
 375    (void)opaque;
 376}
 377
 378void qtest_clock_warp(int64_t dest)
 379{
 380    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 381    AioContext *aio_context;
 382    assert(qtest_enabled());
 383    aio_context = qemu_get_aio_context();
 384    while (clock < dest) {
 385        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 386        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 387
 388        seqlock_write_lock(&timers_state.vm_clock_seqlock);
 389        timers_state.qemu_icount_bias += warp;
 390        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 391
 392        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 393        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 394        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 395    }
 396    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 397}
 398
 399void qemu_clock_warp(QEMUClockType type)
 400{
 401    int64_t clock;
 402    int64_t deadline;
 403
 404    /*
 405     * There are too many global variables to make the "warp" behavior
 406     * applicable to other clocks.  But a clock argument removes the
 407     * need for if statements all over the place.
 408     */
 409    if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
 410        return;
 411    }
 412
 413    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 414     * do not fire, so computing the deadline does not make sense.
 415     */
 416    if (!runstate_is_running()) {
 417        return;
 418    }
 419
 420    /* warp clock deterministically in record/replay mode */
 421    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) {
 422        return;
 423    }
 424
 425    if (icount_sleep) {
 426        /*
 427         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
 428         * This ensures that the deadline for the timer is computed correctly
 429         * below.
 430         * This also makes sure that the insn counter is synchronized before
 431         * the CPU starts running, in case the CPU is woken by an event other
 432         * than the earliest QEMU_CLOCK_VIRTUAL timer.
 433         */
 434        icount_warp_rt();
 435        timer_del(icount_warp_timer);
 436    }
 437    if (!all_cpu_threads_idle()) {
 438        return;
 439    }
 440
 441    if (qtest_enabled()) {
 442        /* When testing, qtest commands advance icount.  */
 443        return;
 444    }
 445
 446    /* We want to use the earliest deadline from ALL vm_clocks */
 447    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 448    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 449    if (deadline < 0) {
 450        static bool notified;
 451        if (!icount_sleep && !notified) {
 452            error_report("WARNING: icount sleep disabled and no active timers");
 453            notified = true;
 454        }
 455        return;
 456    }
 457
 458    if (deadline > 0) {
 459        /*
 460         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 461         * sleep.  Otherwise, the CPU might be waiting for a future timer
 462         * interrupt to wake it up, but the interrupt never comes because
 463         * the vCPU isn't running any insns and thus doesn't advance the
 464         * QEMU_CLOCK_VIRTUAL.
 465         */
 466        if (!icount_sleep) {
 467            /*
 468             * We never let VCPUs sleep in no sleep icount mode.
 469             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 470             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 471             * It is useful when we want a deterministic execution time,
 472             * isolated from host latencies.
 473             */
 474            seqlock_write_lock(&timers_state.vm_clock_seqlock);
 475            timers_state.qemu_icount_bias += deadline;
 476            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 477            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 478        } else {
 479            /*
 480             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 481             * "real" time, (related to the time left until the next event) has
 482             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 483             * This avoids that the warps are visible externally; for example,
 484             * you will not be sending network packets continuously instead of
 485             * every 100ms.
 486             */
 487            seqlock_write_lock(&timers_state.vm_clock_seqlock);
 488            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 489                vm_clock_warp_start = clock;
 490            }
 491            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 492            timer_mod_anticipate(icount_warp_timer, clock + deadline);
 493        }
 494    } else if (deadline == 0) {
 495        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 496    }
 497}
 498
 499static bool icount_state_needed(void *opaque)
 500{
 501    return use_icount;
 502}
 503
 504/*
 505 * This is a subsection for icount migration.
 506 */
 507static const VMStateDescription icount_vmstate_timers = {
 508    .name = "timer/icount",
 509    .version_id = 1,
 510    .minimum_version_id = 1,
 511    .needed = icount_state_needed,
 512    .fields = (VMStateField[]) {
 513        VMSTATE_INT64(qemu_icount_bias, TimersState),
 514        VMSTATE_INT64(qemu_icount, TimersState),
 515        VMSTATE_END_OF_LIST()
 516    }
 517};
 518
 519static const VMStateDescription vmstate_timers = {
 520    .name = "timer",
 521    .version_id = 2,
 522    .minimum_version_id = 1,
 523    .fields = (VMStateField[]) {
 524        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 525        VMSTATE_INT64(dummy, TimersState),
 526        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 527        VMSTATE_END_OF_LIST()
 528    },
 529    .subsections = (const VMStateDescription*[]) {
 530        &icount_vmstate_timers,
 531        NULL
 532    }
 533};
 534
 535static void cpu_throttle_thread(void *opaque)
 536{
 537    CPUState *cpu = opaque;
 538    double pct;
 539    double throttle_ratio;
 540    long sleeptime_ns;
 541
 542    if (!cpu_throttle_get_percentage()) {
 543        return;
 544    }
 545
 546    pct = (double)cpu_throttle_get_percentage()/100;
 547    throttle_ratio = pct / (1 - pct);
 548    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 549
 550    qemu_mutex_unlock_iothread();
 551    atomic_set(&cpu->throttle_thread_scheduled, 0);
 552    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 553    qemu_mutex_lock_iothread();
 554}
 555
 556static void cpu_throttle_timer_tick(void *opaque)
 557{
 558    CPUState *cpu;
 559    double pct;
 560
 561    /* Stop the timer if needed */
 562    if (!cpu_throttle_get_percentage()) {
 563        return;
 564    }
 565    CPU_FOREACH(cpu) {
 566        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 567            async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
 568        }
 569    }
 570
 571    pct = (double)cpu_throttle_get_percentage()/100;
 572    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 573                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 574}
 575
 576void cpu_throttle_set(int new_throttle_pct)
 577{
 578    /* Ensure throttle percentage is within valid range */
 579    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 580    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 581
 582    atomic_set(&throttle_percentage, new_throttle_pct);
 583
 584    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 585                                       CPU_THROTTLE_TIMESLICE_NS);
 586}
 587
 588void cpu_throttle_stop(void)
 589{
 590    atomic_set(&throttle_percentage, 0);
 591}
 592
 593bool cpu_throttle_active(void)
 594{
 595    return (cpu_throttle_get_percentage() != 0);
 596}
 597
 598int cpu_throttle_get_percentage(void)
 599{
 600    return atomic_read(&throttle_percentage);
 601}
 602
 603void cpu_ticks_init(void)
 604{
 605    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
 606    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 607    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 608                                           cpu_throttle_timer_tick, NULL);
 609}
 610
 611void configure_icount(QemuOpts *opts, Error **errp)
 612{
 613    const char *option;
 614    char *rem_str = NULL;
 615
 616    option = qemu_opt_get(opts, "shift");
 617    if (!option) {
 618        if (qemu_opt_get(opts, "align") != NULL) {
 619            error_setg(errp, "Please specify shift option when using align");
 620        }
 621        return;
 622    }
 623
 624    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 625    if (icount_sleep) {
 626        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 627                                         icount_dummy_timer, NULL);
 628    }
 629
 630    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 631
 632    if (icount_align_option && !icount_sleep) {
 633        error_setg(errp, "align=on and sleep=no are incompatible");
 634    }
 635    if (strcmp(option, "auto") != 0) {
 636        errno = 0;
 637        icount_time_shift = strtol(option, &rem_str, 0);
 638        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 639            error_setg(errp, "icount: Invalid shift value");
 640        }
 641        use_icount = 1;
 642        return;
 643    } else if (icount_align_option) {
 644        error_setg(errp, "shift=auto and align=on are incompatible");
 645    } else if (!icount_sleep) {
 646        error_setg(errp, "shift=auto and sleep=no are incompatible");
 647    }
 648
 649    use_icount = 2;
 650
 651    /* 125MIPS seems a reasonable initial guess at the guest speed.
 652       It will be corrected fairly quickly anyway.  */
 653    icount_time_shift = 3;
 654
 655    /* Have both realtime and virtual time triggers for speed adjustment.
 656       The realtime trigger catches emulated time passing too slowly,
 657       the virtual time trigger catches emulated time passing too fast.
 658       Realtime triggers occur even when idle, so use them less frequently
 659       than VM triggers.  */
 660    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 661                                   icount_adjust_rt, NULL);
 662    timer_mod(icount_rt_timer,
 663                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 664    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 665                                        icount_adjust_vm, NULL);
 666    timer_mod(icount_vm_timer,
 667                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 668                   get_ticks_per_sec() / 10);
 669}
 670
 671/***********************************************************/
 672void hw_error(const char *fmt, ...)
 673{
 674    va_list ap;
 675    CPUState *cpu;
 676
 677    va_start(ap, fmt);
 678    fprintf(stderr, "qemu: hardware error: ");
 679    vfprintf(stderr, fmt, ap);
 680    fprintf(stderr, "\n");
 681    CPU_FOREACH(cpu) {
 682        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 683        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 684    }
 685    va_end(ap);
 686    abort();
 687}
 688
 689void cpu_synchronize_all_states(void)
 690{
 691    CPUState *cpu;
 692
 693    CPU_FOREACH(cpu) {
 694        cpu_synchronize_state(cpu);
 695    }
 696}
 697
 698void cpu_synchronize_all_post_reset(void)
 699{
 700    CPUState *cpu;
 701
 702    CPU_FOREACH(cpu) {
 703        cpu_synchronize_post_reset(cpu);
 704    }
 705}
 706
 707void cpu_synchronize_all_post_init(void)
 708{
 709    CPUState *cpu;
 710
 711    CPU_FOREACH(cpu) {
 712        cpu_synchronize_post_init(cpu);
 713    }
 714}
 715
 716static int do_vm_stop(RunState state)
 717{
 718    int ret = 0;
 719
 720    if (runstate_is_running()) {
 721        cpu_disable_ticks();
 722        pause_all_vcpus();
 723        runstate_set(state);
 724        vm_state_notify(0, state);
 725        qapi_event_send_stop(&error_abort);
 726    }
 727
 728    bdrv_drain_all();
 729    ret = bdrv_flush_all();
 730
 731    return ret;
 732}
 733
 734static bool cpu_can_run(CPUState *cpu)
 735{
 736    if (cpu->stop) {
 737        return false;
 738    }
 739    if (cpu_is_stopped(cpu)) {
 740        return false;
 741    }
 742    return true;
 743}
 744
 745static void cpu_handle_guest_debug(CPUState *cpu)
 746{
 747    gdb_set_stop_cpu(cpu);
 748    qemu_system_debug_request();
 749    cpu->stopped = true;
 750}
 751
 752#ifdef CONFIG_LINUX
 753static void sigbus_reraise(void)
 754{
 755    sigset_t set;
 756    struct sigaction action;
 757
 758    memset(&action, 0, sizeof(action));
 759    action.sa_handler = SIG_DFL;
 760    if (!sigaction(SIGBUS, &action, NULL)) {
 761        raise(SIGBUS);
 762        sigemptyset(&set);
 763        sigaddset(&set, SIGBUS);
 764        sigprocmask(SIG_UNBLOCK, &set, NULL);
 765    }
 766    perror("Failed to re-raise SIGBUS!\n");
 767    abort();
 768}
 769
 770static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 771                           void *ctx)
 772{
 773    if (kvm_on_sigbus(siginfo->ssi_code,
 774                      (void *)(intptr_t)siginfo->ssi_addr)) {
 775        sigbus_reraise();
 776    }
 777}
 778
 779static void qemu_init_sigbus(void)
 780{
 781    struct sigaction action;
 782
 783    memset(&action, 0, sizeof(action));
 784    action.sa_flags = SA_SIGINFO;
 785    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
 786    sigaction(SIGBUS, &action, NULL);
 787
 788    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 789}
 790
 791static void qemu_kvm_eat_signals(CPUState *cpu)
 792{
 793    struct timespec ts = { 0, 0 };
 794    siginfo_t siginfo;
 795    sigset_t waitset;
 796    sigset_t chkset;
 797    int r;
 798
 799    sigemptyset(&waitset);
 800    sigaddset(&waitset, SIG_IPI);
 801    sigaddset(&waitset, SIGBUS);
 802
 803    do {
 804        r = sigtimedwait(&waitset, &siginfo, &ts);
 805        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
 806            perror("sigtimedwait");
 807            exit(1);
 808        }
 809
 810        switch (r) {
 811        case SIGBUS:
 812            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
 813                sigbus_reraise();
 814            }
 815            break;
 816        default:
 817            break;
 818        }
 819
 820        r = sigpending(&chkset);
 821        if (r == -1) {
 822            perror("sigpending");
 823            exit(1);
 824        }
 825    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
 826}
 827
 828#else /* !CONFIG_LINUX */
 829
 830static void qemu_init_sigbus(void)
 831{
 832}
 833
 834static void qemu_kvm_eat_signals(CPUState *cpu)
 835{
 836}
 837#endif /* !CONFIG_LINUX */
 838
 839#ifndef _WIN32
 840static void dummy_signal(int sig)
 841{
 842}
 843
 844static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 845{
 846    int r;
 847    sigset_t set;
 848    struct sigaction sigact;
 849
 850    memset(&sigact, 0, sizeof(sigact));
 851    sigact.sa_handler = dummy_signal;
 852    sigaction(SIG_IPI, &sigact, NULL);
 853
 854    pthread_sigmask(SIG_BLOCK, NULL, &set);
 855    sigdelset(&set, SIG_IPI);
 856    sigdelset(&set, SIGBUS);
 857    r = kvm_set_signal_mask(cpu, &set);
 858    if (r) {
 859        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
 860        exit(1);
 861    }
 862}
 863
 864#else /* _WIN32 */
 865static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 866{
 867    abort();
 868}
 869#endif /* _WIN32 */
 870
 871static QemuMutex qemu_global_mutex;
 872static QemuCond qemu_io_proceeded_cond;
 873static unsigned iothread_requesting_mutex;
 874
 875static QemuThread io_thread;
 876
 877/* cpu creation */
 878static QemuCond qemu_cpu_cond;
 879/* system init */
 880static QemuCond qemu_pause_cond;
 881static QemuCond qemu_work_cond;
 882
 883void qemu_init_cpu_loop(void)
 884{
 885    qemu_init_sigbus();
 886    qemu_cond_init(&qemu_cpu_cond);
 887    qemu_cond_init(&qemu_pause_cond);
 888    qemu_cond_init(&qemu_work_cond);
 889    qemu_cond_init(&qemu_io_proceeded_cond);
 890    qemu_mutex_init(&qemu_global_mutex);
 891
 892    qemu_thread_get_self(&io_thread);
 893}
 894
 895void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 896{
 897    struct qemu_work_item wi;
 898
 899    if (qemu_cpu_is_self(cpu)) {
 900        func(data);
 901        return;
 902    }
 903
 904    wi.func = func;
 905    wi.data = data;
 906    wi.free = false;
 907
 908    qemu_mutex_lock(&cpu->work_mutex);
 909    if (cpu->queued_work_first == NULL) {
 910        cpu->queued_work_first = &wi;
 911    } else {
 912        cpu->queued_work_last->next = &wi;
 913    }
 914    cpu->queued_work_last = &wi;
 915    wi.next = NULL;
 916    wi.done = false;
 917    qemu_mutex_unlock(&cpu->work_mutex);
 918
 919    qemu_cpu_kick(cpu);
 920    while (!atomic_mb_read(&wi.done)) {
 921        CPUState *self_cpu = current_cpu;
 922
 923        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
 924        current_cpu = self_cpu;
 925    }
 926}
 927
 928void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 929{
 930    struct qemu_work_item *wi;
 931
 932    if (qemu_cpu_is_self(cpu)) {
 933        func(data);
 934        return;
 935    }
 936
 937    wi = g_malloc0(sizeof(struct qemu_work_item));
 938    wi->func = func;
 939    wi->data = data;
 940    wi->free = true;
 941
 942    qemu_mutex_lock(&cpu->work_mutex);
 943    if (cpu->queued_work_first == NULL) {
 944        cpu->queued_work_first = wi;
 945    } else {
 946        cpu->queued_work_last->next = wi;
 947    }
 948    cpu->queued_work_last = wi;
 949    wi->next = NULL;
 950    wi->done = false;
 951    qemu_mutex_unlock(&cpu->work_mutex);
 952
 953    qemu_cpu_kick(cpu);
 954}
 955
 956static void flush_queued_work(CPUState *cpu)
 957{
 958    struct qemu_work_item *wi;
 959
 960    if (cpu->queued_work_first == NULL) {
 961        return;
 962    }
 963
 964    qemu_mutex_lock(&cpu->work_mutex);
 965    while (cpu->queued_work_first != NULL) {
 966        wi = cpu->queued_work_first;
 967        cpu->queued_work_first = wi->next;
 968        if (!cpu->queued_work_first) {
 969            cpu->queued_work_last = NULL;
 970        }
 971        qemu_mutex_unlock(&cpu->work_mutex);
 972        wi->func(wi->data);
 973        qemu_mutex_lock(&cpu->work_mutex);
 974        if (wi->free) {
 975            g_free(wi);
 976        } else {
 977            atomic_mb_set(&wi->done, true);
 978        }
 979    }
 980    qemu_mutex_unlock(&cpu->work_mutex);
 981    qemu_cond_broadcast(&qemu_work_cond);
 982}
 983
 984static void qemu_wait_io_event_common(CPUState *cpu)
 985{
 986    if (cpu->stop) {
 987        cpu->stop = false;
 988        cpu->stopped = true;
 989        qemu_cond_broadcast(&qemu_pause_cond);
 990    }
 991    flush_queued_work(cpu);
 992    cpu->thread_kicked = false;
 993}
 994
 995static void qemu_tcg_wait_io_event(CPUState *cpu)
 996{
 997    while (all_cpu_threads_idle()) {
 998       /* Start accounting real time to the virtual clock if the CPUs
 999          are idle.  */
1000        qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1001        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1002    }
1003
1004    while (iothread_requesting_mutex) {
1005        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1006    }
1007
1008    CPU_FOREACH(cpu) {
1009        qemu_wait_io_event_common(cpu);
1010    }
1011}
1012
1013static void qemu_kvm_wait_io_event(CPUState *cpu)
1014{
1015    while (cpu_thread_is_idle(cpu)) {
1016        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1017    }
1018
1019    qemu_kvm_eat_signals(cpu);
1020    qemu_wait_io_event_common(cpu);
1021}
1022
1023static void *qemu_kvm_cpu_thread_fn(void *arg)
1024{
1025    CPUState *cpu = arg;
1026    int r;
1027
1028    rcu_register_thread();
1029
1030    qemu_mutex_lock_iothread();
1031    qemu_thread_get_self(cpu->thread);
1032    cpu->thread_id = qemu_get_thread_id();
1033    cpu->can_do_io = 1;
1034    current_cpu = cpu;
1035
1036    r = kvm_init_vcpu(cpu);
1037    if (r < 0) {
1038        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1039        exit(1);
1040    }
1041
1042    qemu_kvm_init_cpu_signals(cpu);
1043
1044    /* signal CPU creation */
1045    cpu->created = true;
1046    qemu_cond_signal(&qemu_cpu_cond);
1047
1048    while (1) {
1049        if (cpu_can_run(cpu)) {
1050            r = kvm_cpu_exec(cpu);
1051            if (r == EXCP_DEBUG) {
1052                cpu_handle_guest_debug(cpu);
1053            }
1054        }
1055        qemu_kvm_wait_io_event(cpu);
1056    }
1057
1058    return NULL;
1059}
1060
1061static void *qemu_dummy_cpu_thread_fn(void *arg)
1062{
1063#ifdef _WIN32
1064    fprintf(stderr, "qtest is not supported under Windows\n");
1065    exit(1);
1066#else
1067    CPUState *cpu = arg;
1068    sigset_t waitset;
1069    int r;
1070
1071    rcu_register_thread();
1072
1073    qemu_mutex_lock_iothread();
1074    qemu_thread_get_self(cpu->thread);
1075    cpu->thread_id = qemu_get_thread_id();
1076    cpu->can_do_io = 1;
1077
1078    sigemptyset(&waitset);
1079    sigaddset(&waitset, SIG_IPI);
1080
1081    /* signal CPU creation */
1082    cpu->created = true;
1083    qemu_cond_signal(&qemu_cpu_cond);
1084
1085    current_cpu = cpu;
1086    while (1) {
1087        current_cpu = NULL;
1088        qemu_mutex_unlock_iothread();
1089        do {
1090            int sig;
1091            r = sigwait(&waitset, &sig);
1092        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1093        if (r == -1) {
1094            perror("sigwait");
1095            exit(1);
1096        }
1097        qemu_mutex_lock_iothread();
1098        current_cpu = cpu;
1099        qemu_wait_io_event_common(cpu);
1100    }
1101
1102    return NULL;
1103#endif
1104}
1105
1106static void tcg_exec_all(void);
1107
1108static void *qemu_tcg_cpu_thread_fn(void *arg)
1109{
1110    CPUState *cpu = arg;
1111
1112    rcu_register_thread();
1113
1114    qemu_mutex_lock_iothread();
1115    qemu_thread_get_self(cpu->thread);
1116
1117    CPU_FOREACH(cpu) {
1118        cpu->thread_id = qemu_get_thread_id();
1119        cpu->created = true;
1120        cpu->can_do_io = 1;
1121    }
1122    qemu_cond_signal(&qemu_cpu_cond);
1123
1124    /* wait for initial kick-off after machine start */
1125    while (first_cpu->stopped) {
1126        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1127
1128        /* process any pending work */
1129        CPU_FOREACH(cpu) {
1130            qemu_wait_io_event_common(cpu);
1131        }
1132    }
1133
1134    /* process any pending work */
1135    atomic_mb_set(&exit_request, 1);
1136
1137    while (1) {
1138        tcg_exec_all();
1139
1140        if (use_icount) {
1141            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1142
1143            if (deadline == 0) {
1144                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1145            }
1146        }
1147        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1148    }
1149
1150    return NULL;
1151}
1152
1153static void qemu_cpu_kick_thread(CPUState *cpu)
1154{
1155#ifndef _WIN32
1156    int err;
1157
1158    if (cpu->thread_kicked) {
1159        return;
1160    }
1161    cpu->thread_kicked = true;
1162    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1163    if (err) {
1164        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1165        exit(1);
1166    }
1167#else /* _WIN32 */
1168    abort();
1169#endif
1170}
1171
1172static void qemu_cpu_kick_no_halt(void)
1173{
1174    CPUState *cpu;
1175    /* Ensure whatever caused the exit has reached the CPU threads before
1176     * writing exit_request.
1177     */
1178    atomic_mb_set(&exit_request, 1);
1179    cpu = atomic_mb_read(&tcg_current_cpu);
1180    if (cpu) {
1181        cpu_exit(cpu);
1182    }
1183}
1184
1185void qemu_cpu_kick(CPUState *cpu)
1186{
1187    qemu_cond_broadcast(cpu->halt_cond);
1188    if (tcg_enabled()) {
1189        qemu_cpu_kick_no_halt();
1190    } else {
1191        qemu_cpu_kick_thread(cpu);
1192    }
1193}
1194
1195void qemu_cpu_kick_self(void)
1196{
1197    assert(current_cpu);
1198    qemu_cpu_kick_thread(current_cpu);
1199}
1200
1201bool qemu_cpu_is_self(CPUState *cpu)
1202{
1203    return qemu_thread_is_self(cpu->thread);
1204}
1205
1206bool qemu_in_vcpu_thread(void)
1207{
1208    return current_cpu && qemu_cpu_is_self(current_cpu);
1209}
1210
1211static __thread bool iothread_locked = false;
1212
1213bool qemu_mutex_iothread_locked(void)
1214{
1215    return iothread_locked;
1216}
1217
1218void qemu_mutex_lock_iothread(void)
1219{
1220    atomic_inc(&iothread_requesting_mutex);
1221    /* In the simple case there is no need to bump the VCPU thread out of
1222     * TCG code execution.
1223     */
1224    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1225        !first_cpu || !first_cpu->created) {
1226        qemu_mutex_lock(&qemu_global_mutex);
1227        atomic_dec(&iothread_requesting_mutex);
1228    } else {
1229        if (qemu_mutex_trylock(&qemu_global_mutex)) {
1230            qemu_cpu_kick_no_halt();
1231            qemu_mutex_lock(&qemu_global_mutex);
1232        }
1233        atomic_dec(&iothread_requesting_mutex);
1234        qemu_cond_broadcast(&qemu_io_proceeded_cond);
1235    }
1236    iothread_locked = true;
1237}
1238
1239void qemu_mutex_unlock_iothread(void)
1240{
1241    iothread_locked = false;
1242    qemu_mutex_unlock(&qemu_global_mutex);
1243}
1244
1245static int all_vcpus_paused(void)
1246{
1247    CPUState *cpu;
1248
1249    CPU_FOREACH(cpu) {
1250        if (!cpu->stopped) {
1251            return 0;
1252        }
1253    }
1254
1255    return 1;
1256}
1257
1258void pause_all_vcpus(void)
1259{
1260    CPUState *cpu;
1261
1262    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1263    CPU_FOREACH(cpu) {
1264        cpu->stop = true;
1265        qemu_cpu_kick(cpu);
1266    }
1267
1268    if (qemu_in_vcpu_thread()) {
1269        cpu_stop_current();
1270        if (!kvm_enabled()) {
1271            CPU_FOREACH(cpu) {
1272                cpu->stop = false;
1273                cpu->stopped = true;
1274            }
1275            return;
1276        }
1277    }
1278
1279    while (!all_vcpus_paused()) {
1280        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1281        CPU_FOREACH(cpu) {
1282            qemu_cpu_kick(cpu);
1283        }
1284    }
1285}
1286
1287void cpu_resume(CPUState *cpu)
1288{
1289    cpu->stop = false;
1290    cpu->stopped = false;
1291    qemu_cpu_kick(cpu);
1292}
1293
1294void resume_all_vcpus(void)
1295{
1296    CPUState *cpu;
1297
1298    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1299    CPU_FOREACH(cpu) {
1300        cpu_resume(cpu);
1301    }
1302}
1303
1304/* For temporary buffers for forming a name */
1305#define VCPU_THREAD_NAME_SIZE 16
1306
1307static void qemu_tcg_init_vcpu(CPUState *cpu)
1308{
1309    char thread_name[VCPU_THREAD_NAME_SIZE];
1310    static QemuCond *tcg_halt_cond;
1311    static QemuThread *tcg_cpu_thread;
1312
1313    tcg_cpu_address_space_init(cpu, cpu->as);
1314
1315    /* share a single thread for all cpus with TCG */
1316    if (!tcg_cpu_thread) {
1317        cpu->thread = g_malloc0(sizeof(QemuThread));
1318        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1319        qemu_cond_init(cpu->halt_cond);
1320        tcg_halt_cond = cpu->halt_cond;
1321        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1322                 cpu->cpu_index);
1323        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1324                           cpu, QEMU_THREAD_JOINABLE);
1325#ifdef _WIN32
1326        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1327#endif
1328        while (!cpu->created) {
1329            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1330        }
1331        tcg_cpu_thread = cpu->thread;
1332    } else {
1333        cpu->thread = tcg_cpu_thread;
1334        cpu->halt_cond = tcg_halt_cond;
1335    }
1336}
1337
1338static void qemu_kvm_start_vcpu(CPUState *cpu)
1339{
1340    char thread_name[VCPU_THREAD_NAME_SIZE];
1341
1342    cpu->thread = g_malloc0(sizeof(QemuThread));
1343    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1344    qemu_cond_init(cpu->halt_cond);
1345    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1346             cpu->cpu_index);
1347    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1348                       cpu, QEMU_THREAD_JOINABLE);
1349    while (!cpu->created) {
1350        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1351    }
1352}
1353
1354static void qemu_dummy_start_vcpu(CPUState *cpu)
1355{
1356    char thread_name[VCPU_THREAD_NAME_SIZE];
1357
1358    cpu->thread = g_malloc0(sizeof(QemuThread));
1359    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1360    qemu_cond_init(cpu->halt_cond);
1361    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1362             cpu->cpu_index);
1363    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1364                       QEMU_THREAD_JOINABLE);
1365    while (!cpu->created) {
1366        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1367    }
1368}
1369
1370void qemu_init_vcpu(CPUState *cpu)
1371{
1372    cpu->nr_cores = smp_cores;
1373    cpu->nr_threads = smp_threads;
1374    cpu->stopped = true;
1375    if (kvm_enabled()) {
1376        qemu_kvm_start_vcpu(cpu);
1377    } else if (tcg_enabled()) {
1378        qemu_tcg_init_vcpu(cpu);
1379    } else {
1380        qemu_dummy_start_vcpu(cpu);
1381    }
1382}
1383
1384void cpu_stop_current(void)
1385{
1386    if (current_cpu) {
1387        current_cpu->stop = false;
1388        current_cpu->stopped = true;
1389        cpu_exit(current_cpu);
1390        qemu_cond_broadcast(&qemu_pause_cond);
1391    }
1392}
1393
1394int vm_stop(RunState state)
1395{
1396    if (qemu_in_vcpu_thread()) {
1397        qemu_system_vmstop_request_prepare();
1398        qemu_system_vmstop_request(state);
1399        /*
1400         * FIXME: should not return to device code in case
1401         * vm_stop() has been requested.
1402         */
1403        cpu_stop_current();
1404        return 0;
1405    }
1406
1407    return do_vm_stop(state);
1408}
1409
1410/* does a state transition even if the VM is already stopped,
1411   current state is forgotten forever */
1412int vm_stop_force_state(RunState state)
1413{
1414    if (runstate_is_running()) {
1415        return vm_stop(state);
1416    } else {
1417        runstate_set(state);
1418
1419        bdrv_drain_all();
1420        /* Make sure to return an error if the flush in a previous vm_stop()
1421         * failed. */
1422        return bdrv_flush_all();
1423    }
1424}
1425
1426static int64_t tcg_get_icount_limit(void)
1427{
1428    int64_t deadline;
1429
1430    if (replay_mode != REPLAY_MODE_PLAY) {
1431        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1432
1433        /* Maintain prior (possibly buggy) behaviour where if no deadline
1434         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1435         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1436         * nanoseconds.
1437         */
1438        if ((deadline < 0) || (deadline > INT32_MAX)) {
1439            deadline = INT32_MAX;
1440        }
1441
1442        return qemu_icount_round(deadline);
1443    } else {
1444        return replay_get_instructions();
1445    }
1446}
1447
1448static int tcg_cpu_exec(CPUState *cpu)
1449{
1450    int ret;
1451#ifdef CONFIG_PROFILER
1452    int64_t ti;
1453#endif
1454
1455#ifdef CONFIG_PROFILER
1456    ti = profile_getclock();
1457#endif
1458    if (use_icount) {
1459        int64_t count;
1460        int decr;
1461        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1462                                    + cpu->icount_extra);
1463        cpu->icount_decr.u16.low = 0;
1464        cpu->icount_extra = 0;
1465        count = tcg_get_icount_limit();
1466        timers_state.qemu_icount += count;
1467        decr = (count > 0xffff) ? 0xffff : count;
1468        count -= decr;
1469        cpu->icount_decr.u16.low = decr;
1470        cpu->icount_extra = count;
1471    }
1472    ret = cpu_exec(cpu);
1473#ifdef CONFIG_PROFILER
1474    tcg_time += profile_getclock() - ti;
1475#endif
1476    if (use_icount) {
1477        /* Fold pending instructions back into the
1478           instruction counter, and clear the interrupt flag.  */
1479        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1480                        + cpu->icount_extra);
1481        cpu->icount_decr.u32 = 0;
1482        cpu->icount_extra = 0;
1483        replay_account_executed_instructions();
1484    }
1485    return ret;
1486}
1487
1488static void tcg_exec_all(void)
1489{
1490    int r;
1491
1492    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1493    qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1494
1495    if (next_cpu == NULL) {
1496        next_cpu = first_cpu;
1497    }
1498    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1499        CPUState *cpu = next_cpu;
1500
1501        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1502                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1503
1504        if (cpu_can_run(cpu)) {
1505            r = tcg_cpu_exec(cpu);
1506            if (r == EXCP_DEBUG) {
1507                cpu_handle_guest_debug(cpu);
1508                break;
1509            }
1510        } else if (cpu->stop || cpu->stopped) {
1511            break;
1512        }
1513    }
1514
1515    /* Pairs with smp_wmb in qemu_cpu_kick.  */
1516    atomic_mb_set(&exit_request, 0);
1517}
1518
1519void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1520{
1521    /* XXX: implement xxx_cpu_list for targets that still miss it */
1522#if defined(cpu_list)
1523    cpu_list(f, cpu_fprintf);
1524#endif
1525}
1526
1527CpuInfoList *qmp_query_cpus(Error **errp)
1528{
1529    CpuInfoList *head = NULL, *cur_item = NULL;
1530    CPUState *cpu;
1531
1532    CPU_FOREACH(cpu) {
1533        CpuInfoList *info;
1534#if defined(TARGET_I386)
1535        X86CPU *x86_cpu = X86_CPU(cpu);
1536        CPUX86State *env = &x86_cpu->env;
1537#elif defined(TARGET_PPC)
1538        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1539        CPUPPCState *env = &ppc_cpu->env;
1540#elif defined(TARGET_SPARC)
1541        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1542        CPUSPARCState *env = &sparc_cpu->env;
1543#elif defined(TARGET_MIPS)
1544        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1545        CPUMIPSState *env = &mips_cpu->env;
1546#elif defined(TARGET_TRICORE)
1547        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1548        CPUTriCoreState *env = &tricore_cpu->env;
1549#endif
1550
1551        cpu_synchronize_state(cpu);
1552
1553        info = g_malloc0(sizeof(*info));
1554        info->value = g_malloc0(sizeof(*info->value));
1555        info->value->CPU = cpu->cpu_index;
1556        info->value->current = (cpu == first_cpu);
1557        info->value->halted = cpu->halted;
1558        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1559        info->value->thread_id = cpu->thread_id;
1560#if defined(TARGET_I386)
1561        info->value->has_pc = true;
1562        info->value->pc = env->eip + env->segs[R_CS].base;
1563#elif defined(TARGET_PPC)
1564        info->value->has_nip = true;
1565        info->value->nip = env->nip;
1566#elif defined(TARGET_SPARC)
1567        info->value->has_pc = true;
1568        info->value->pc = env->pc;
1569        info->value->has_npc = true;
1570        info->value->npc = env->npc;
1571#elif defined(TARGET_MIPS)
1572        info->value->has_PC = true;
1573        info->value->PC = env->active_tc.PC;
1574#elif defined(TARGET_TRICORE)
1575        info->value->has_PC = true;
1576        info->value->PC = env->PC;
1577#endif
1578
1579        /* XXX: waiting for the qapi to support GSList */
1580        if (!cur_item) {
1581            head = cur_item = info;
1582        } else {
1583            cur_item->next = info;
1584            cur_item = info;
1585        }
1586    }
1587
1588    return head;
1589}
1590
1591void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1592                 bool has_cpu, int64_t cpu_index, Error **errp)
1593{
1594    FILE *f;
1595    uint32_t l;
1596    CPUState *cpu;
1597    uint8_t buf[1024];
1598    int64_t orig_addr = addr, orig_size = size;
1599
1600    if (!has_cpu) {
1601        cpu_index = 0;
1602    }
1603
1604    cpu = qemu_get_cpu(cpu_index);
1605    if (cpu == NULL) {
1606        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1607                   "a CPU number");
1608        return;
1609    }
1610
1611    f = fopen(filename, "wb");
1612    if (!f) {
1613        error_setg_file_open(errp, errno, filename);
1614        return;
1615    }
1616
1617    while (size != 0) {
1618        l = sizeof(buf);
1619        if (l > size)
1620            l = size;
1621        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1622            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1623                             " specified", orig_addr, orig_size);
1624            goto exit;
1625        }
1626        if (fwrite(buf, 1, l, f) != l) {
1627            error_setg(errp, QERR_IO_ERROR);
1628            goto exit;
1629        }
1630        addr += l;
1631        size -= l;
1632    }
1633
1634exit:
1635    fclose(f);
1636}
1637
1638void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1639                  Error **errp)
1640{
1641    FILE *f;
1642    uint32_t l;
1643    uint8_t buf[1024];
1644
1645    f = fopen(filename, "wb");
1646    if (!f) {
1647        error_setg_file_open(errp, errno, filename);
1648        return;
1649    }
1650
1651    while (size != 0) {
1652        l = sizeof(buf);
1653        if (l > size)
1654            l = size;
1655        cpu_physical_memory_read(addr, buf, l);
1656        if (fwrite(buf, 1, l, f) != l) {
1657            error_setg(errp, QERR_IO_ERROR);
1658            goto exit;
1659        }
1660        addr += l;
1661        size -= l;
1662    }
1663
1664exit:
1665    fclose(f);
1666}
1667
1668void qmp_inject_nmi(Error **errp)
1669{
1670#if defined(TARGET_I386)
1671    CPUState *cs;
1672
1673    CPU_FOREACH(cs) {
1674        X86CPU *cpu = X86_CPU(cs);
1675
1676        if (!cpu->apic_state) {
1677            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1678        } else {
1679            apic_deliver_nmi(cpu->apic_state);
1680        }
1681    }
1682#else
1683    nmi_monitor_handle(monitor_get_cpu_index(), errp);
1684#endif
1685}
1686
1687void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1688{
1689    if (!use_icount) {
1690        return;
1691    }
1692
1693    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1694                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1695    if (icount_align_option) {
1696        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1697        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1698    } else {
1699        cpu_fprintf(f, "Max guest delay     NA\n");
1700        cpu_fprintf(f, "Max guest advance   NA\n");
1701    }
1702}
1703