qemu/cpus.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "qemu/osdep.h"
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "monitor/monitor.h"
  30#include "qapi/qmp/qerror.h"
  31#include "qemu/error-report.h"
  32#include "sysemu/sysemu.h"
  33#include "sysemu/block-backend.h"
  34#include "exec/gdbstub.h"
  35#include "sysemu/dma.h"
  36#include "sysemu/kvm.h"
  37#include "qmp-commands.h"
  38#include "exec/exec-all.h"
  39
  40#include "qemu/thread.h"
  41#include "sysemu/cpus.h"
  42#include "sysemu/qtest.h"
  43#include "qemu/main-loop.h"
  44#include "qemu/bitmap.h"
  45#include "qemu/seqlock.h"
  46#include "qapi-event.h"
  47#include "hw/nmi.h"
  48#include "sysemu/replay.h"
  49
  50#ifndef _WIN32
  51#include "qemu/compatfd.h"
  52#endif
  53
  54#ifdef CONFIG_LINUX
  55
  56#include <sys/prctl.h>
  57
  58#ifndef PR_MCE_KILL
  59#define PR_MCE_KILL 33
  60#endif
  61
  62#ifndef PR_MCE_KILL_SET
  63#define PR_MCE_KILL_SET 1
  64#endif
  65
  66#ifndef PR_MCE_KILL_EARLY
  67#define PR_MCE_KILL_EARLY 1
  68#endif
  69
  70#endif /* CONFIG_LINUX */
  71
  72int64_t max_delay;
  73int64_t max_advance;
  74
  75/* vcpu throttling controls */
  76static QEMUTimer *throttle_timer;
  77static unsigned int throttle_percentage;
  78
  79#define CPU_THROTTLE_PCT_MIN 1
  80#define CPU_THROTTLE_PCT_MAX 99
  81#define CPU_THROTTLE_TIMESLICE_NS 10000000
  82
  83bool cpu_is_stopped(CPUState *cpu)
  84{
  85    return cpu->stopped || !runstate_is_running();
  86}
  87
  88static bool cpu_thread_is_idle(CPUState *cpu)
  89{
  90    if (cpu->stop || cpu->queued_work_first) {
  91        return false;
  92    }
  93    if (cpu_is_stopped(cpu)) {
  94        return true;
  95    }
  96    if (!cpu->halted || cpu_has_work(cpu) ||
  97        kvm_halt_in_kernel()) {
  98        return false;
  99    }
 100    return true;
 101}
 102
 103static bool all_cpu_threads_idle(void)
 104{
 105    CPUState *cpu;
 106
 107    CPU_FOREACH(cpu) {
 108        if (!cpu_thread_is_idle(cpu)) {
 109            return false;
 110        }
 111    }
 112    return true;
 113}
 114
 115/***********************************************************/
 116/* guest cycle counter */
 117
 118/* Protected by TimersState seqlock */
 119
 120static bool icount_sleep = true;
 121static int64_t vm_clock_warp_start = -1;
 122/* Conversion factor from emulated instructions to virtual clock ticks.  */
 123static int icount_time_shift;
 124/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 125#define MAX_ICOUNT_SHIFT 10
 126
 127static QEMUTimer *icount_rt_timer;
 128static QEMUTimer *icount_vm_timer;
 129static QEMUTimer *icount_warp_timer;
 130
 131typedef struct TimersState {
 132    /* Protected by BQL.  */
 133    int64_t cpu_ticks_prev;
 134    int64_t cpu_ticks_offset;
 135
 136    /* cpu_clock_offset can be read out of BQL, so protect it with
 137     * this lock.
 138     */
 139    QemuSeqLock vm_clock_seqlock;
 140    int64_t cpu_clock_offset;
 141    int32_t cpu_ticks_enabled;
 142    int64_t dummy;
 143
 144    /* Compensate for varying guest execution speed.  */
 145    int64_t qemu_icount_bias;
 146    /* Only written by TCG thread */
 147    int64_t qemu_icount;
 148} TimersState;
 149
 150static TimersState timers_state;
 151
 152int64_t cpu_get_icount_raw(void)
 153{
 154    int64_t icount;
 155    CPUState *cpu = current_cpu;
 156
 157    icount = timers_state.qemu_icount;
 158    if (cpu) {
 159        if (!cpu->can_do_io) {
 160            fprintf(stderr, "Bad icount read\n");
 161            exit(1);
 162        }
 163        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
 164    }
 165    return icount;
 166}
 167
 168/* Return the virtual CPU time, based on the instruction counter.  */
 169static int64_t cpu_get_icount_locked(void)
 170{
 171    int64_t icount = cpu_get_icount_raw();
 172    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 173}
 174
 175int64_t cpu_get_icount(void)
 176{
 177    int64_t icount;
 178    unsigned start;
 179
 180    do {
 181        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 182        icount = cpu_get_icount_locked();
 183    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 184
 185    return icount;
 186}
 187
 188int64_t cpu_icount_to_ns(int64_t icount)
 189{
 190    return icount << icount_time_shift;
 191}
 192
 193/* return the time elapsed in VM between vm_start and vm_stop.  Unless
 194 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 195 * counter.
 196 *
 197 * Caller must hold the BQL
 198 */
 199int64_t cpu_get_ticks(void)
 200{
 201    int64_t ticks;
 202
 203    if (use_icount) {
 204        return cpu_get_icount();
 205    }
 206
 207    ticks = timers_state.cpu_ticks_offset;
 208    if (timers_state.cpu_ticks_enabled) {
 209        ticks += cpu_get_host_ticks();
 210    }
 211
 212    if (timers_state.cpu_ticks_prev > ticks) {
 213        /* Note: non increasing ticks may happen if the host uses
 214           software suspend */
 215        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 216        ticks = timers_state.cpu_ticks_prev;
 217    }
 218
 219    timers_state.cpu_ticks_prev = ticks;
 220    return ticks;
 221}
 222
 223static int64_t cpu_get_clock_locked(void)
 224{
 225    int64_t time;
 226
 227    time = timers_state.cpu_clock_offset;
 228    if (timers_state.cpu_ticks_enabled) {
 229        time += get_clock();
 230    }
 231
 232    return time;
 233}
 234
 235/* Return the monotonic time elapsed in VM, i.e.,
 236 * the time between vm_start and vm_stop
 237 */
 238int64_t cpu_get_clock(void)
 239{
 240    int64_t ti;
 241    unsigned start;
 242
 243    do {
 244        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 245        ti = cpu_get_clock_locked();
 246    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 247
 248    return ti;
 249}
 250
 251/* enable cpu_get_ticks()
 252 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 253 */
 254void cpu_enable_ticks(void)
 255{
 256    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 257    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 258    if (!timers_state.cpu_ticks_enabled) {
 259        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 260        timers_state.cpu_clock_offset -= get_clock();
 261        timers_state.cpu_ticks_enabled = 1;
 262    }
 263    seqlock_write_end(&timers_state.vm_clock_seqlock);
 264}
 265
 266/* disable cpu_get_ticks() : the clock is stopped. You must not call
 267 * cpu_get_ticks() after that.
 268 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 269 */
 270void cpu_disable_ticks(void)
 271{
 272    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 273    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 274    if (timers_state.cpu_ticks_enabled) {
 275        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 276        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 277        timers_state.cpu_ticks_enabled = 0;
 278    }
 279    seqlock_write_end(&timers_state.vm_clock_seqlock);
 280}
 281
 282/* Correlation between real and virtual time is always going to be
 283   fairly approximate, so ignore small variation.
 284   When the guest is idle real and virtual time will be aligned in
 285   the IO wait loop.  */
 286#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 287
 288static void icount_adjust(void)
 289{
 290    int64_t cur_time;
 291    int64_t cur_icount;
 292    int64_t delta;
 293
 294    /* Protected by TimersState mutex.  */
 295    static int64_t last_delta;
 296
 297    /* If the VM is not running, then do nothing.  */
 298    if (!runstate_is_running()) {
 299        return;
 300    }
 301
 302    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 303    cur_time = cpu_get_clock_locked();
 304    cur_icount = cpu_get_icount_locked();
 305
 306    delta = cur_icount - cur_time;
 307    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 308    if (delta > 0
 309        && last_delta + ICOUNT_WOBBLE < delta * 2
 310        && icount_time_shift > 0) {
 311        /* The guest is getting too far ahead.  Slow time down.  */
 312        icount_time_shift--;
 313    }
 314    if (delta < 0
 315        && last_delta - ICOUNT_WOBBLE > delta * 2
 316        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 317        /* The guest is getting too far behind.  Speed time up.  */
 318        icount_time_shift++;
 319    }
 320    last_delta = delta;
 321    timers_state.qemu_icount_bias = cur_icount
 322                              - (timers_state.qemu_icount << icount_time_shift);
 323    seqlock_write_end(&timers_state.vm_clock_seqlock);
 324}
 325
 326static void icount_adjust_rt(void *opaque)
 327{
 328    timer_mod(icount_rt_timer,
 329              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 330    icount_adjust();
 331}
 332
 333static void icount_adjust_vm(void *opaque)
 334{
 335    timer_mod(icount_vm_timer,
 336                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 337                   NANOSECONDS_PER_SECOND / 10);
 338    icount_adjust();
 339}
 340
 341static int64_t qemu_icount_round(int64_t count)
 342{
 343    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 344}
 345
 346static void icount_warp_rt(void)
 347{
 348    unsigned seq;
 349    int64_t warp_start;
 350
 351    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 352     * changes from -1 to another value, so the race here is okay.
 353     */
 354    do {
 355        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 356        warp_start = vm_clock_warp_start;
 357    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 358
 359    if (warp_start == -1) {
 360        return;
 361    }
 362
 363    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 364    if (runstate_is_running()) {
 365        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 366                                     cpu_get_clock_locked());
 367        int64_t warp_delta;
 368
 369        warp_delta = clock - vm_clock_warp_start;
 370        if (use_icount == 2) {
 371            /*
 372             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 373             * far ahead of real time.
 374             */
 375            int64_t cur_icount = cpu_get_icount_locked();
 376            int64_t delta = clock - cur_icount;
 377            warp_delta = MIN(warp_delta, delta);
 378        }
 379        timers_state.qemu_icount_bias += warp_delta;
 380    }
 381    vm_clock_warp_start = -1;
 382    seqlock_write_end(&timers_state.vm_clock_seqlock);
 383
 384    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 385        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 386    }
 387}
 388
 389static void icount_timer_cb(void *opaque)
 390{
 391    /* No need for a checkpoint because the timer already synchronizes
 392     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 393     */
 394    icount_warp_rt();
 395}
 396
 397void qtest_clock_warp(int64_t dest)
 398{
 399    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 400    AioContext *aio_context;
 401    assert(qtest_enabled());
 402    aio_context = qemu_get_aio_context();
 403    while (clock < dest) {
 404        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 405        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 406
 407        seqlock_write_begin(&timers_state.vm_clock_seqlock);
 408        timers_state.qemu_icount_bias += warp;
 409        seqlock_write_end(&timers_state.vm_clock_seqlock);
 410
 411        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 412        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 413        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 414    }
 415    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 416}
 417
 418void qemu_start_warp_timer(void)
 419{
 420    int64_t clock;
 421    int64_t deadline;
 422
 423    if (!use_icount) {
 424        return;
 425    }
 426
 427    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 428     * do not fire, so computing the deadline does not make sense.
 429     */
 430    if (!runstate_is_running()) {
 431        return;
 432    }
 433
 434    /* warp clock deterministically in record/replay mode */
 435    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 436        return;
 437    }
 438
 439    if (!all_cpu_threads_idle()) {
 440        return;
 441    }
 442
 443    if (qtest_enabled()) {
 444        /* When testing, qtest commands advance icount.  */
 445        return;
 446    }
 447
 448    /* We want to use the earliest deadline from ALL vm_clocks */
 449    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 450    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 451    if (deadline < 0) {
 452        static bool notified;
 453        if (!icount_sleep && !notified) {
 454            error_report("WARNING: icount sleep disabled and no active timers");
 455            notified = true;
 456        }
 457        return;
 458    }
 459
 460    if (deadline > 0) {
 461        /*
 462         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 463         * sleep.  Otherwise, the CPU might be waiting for a future timer
 464         * interrupt to wake it up, but the interrupt never comes because
 465         * the vCPU isn't running any insns and thus doesn't advance the
 466         * QEMU_CLOCK_VIRTUAL.
 467         */
 468        if (!icount_sleep) {
 469            /*
 470             * We never let VCPUs sleep in no sleep icount mode.
 471             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 472             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 473             * It is useful when we want a deterministic execution time,
 474             * isolated from host latencies.
 475             */
 476            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 477            timers_state.qemu_icount_bias += deadline;
 478            seqlock_write_end(&timers_state.vm_clock_seqlock);
 479            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 480        } else {
 481            /*
 482             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 483             * "real" time, (related to the time left until the next event) has
 484             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 485             * This avoids that the warps are visible externally; for example,
 486             * you will not be sending network packets continuously instead of
 487             * every 100ms.
 488             */
 489            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 490            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 491                vm_clock_warp_start = clock;
 492            }
 493            seqlock_write_end(&timers_state.vm_clock_seqlock);
 494            timer_mod_anticipate(icount_warp_timer, clock + deadline);
 495        }
 496    } else if (deadline == 0) {
 497        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 498    }
 499}
 500
 501static void qemu_account_warp_timer(void)
 502{
 503    if (!use_icount || !icount_sleep) {
 504        return;
 505    }
 506
 507    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 508     * do not fire, so computing the deadline does not make sense.
 509     */
 510    if (!runstate_is_running()) {
 511        return;
 512    }
 513
 514    /* warp clock deterministically in record/replay mode */
 515    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 516        return;
 517    }
 518
 519    timer_del(icount_warp_timer);
 520    icount_warp_rt();
 521}
 522
 523static bool icount_state_needed(void *opaque)
 524{
 525    return use_icount;
 526}
 527
 528/*
 529 * This is a subsection for icount migration.
 530 */
 531static const VMStateDescription icount_vmstate_timers = {
 532    .name = "timer/icount",
 533    .version_id = 1,
 534    .minimum_version_id = 1,
 535    .needed = icount_state_needed,
 536    .fields = (VMStateField[]) {
 537        VMSTATE_INT64(qemu_icount_bias, TimersState),
 538        VMSTATE_INT64(qemu_icount, TimersState),
 539        VMSTATE_END_OF_LIST()
 540    }
 541};
 542
 543static const VMStateDescription vmstate_timers = {
 544    .name = "timer",
 545    .version_id = 2,
 546    .minimum_version_id = 1,
 547    .fields = (VMStateField[]) {
 548        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 549        VMSTATE_INT64(dummy, TimersState),
 550        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 551        VMSTATE_END_OF_LIST()
 552    },
 553    .subsections = (const VMStateDescription*[]) {
 554        &icount_vmstate_timers,
 555        NULL
 556    }
 557};
 558
 559static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 560{
 561    double pct;
 562    double throttle_ratio;
 563    long sleeptime_ns;
 564
 565    if (!cpu_throttle_get_percentage()) {
 566        return;
 567    }
 568
 569    pct = (double)cpu_throttle_get_percentage()/100;
 570    throttle_ratio = pct / (1 - pct);
 571    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 572
 573    qemu_mutex_unlock_iothread();
 574    atomic_set(&cpu->throttle_thread_scheduled, 0);
 575    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 576    qemu_mutex_lock_iothread();
 577}
 578
 579static void cpu_throttle_timer_tick(void *opaque)
 580{
 581    CPUState *cpu;
 582    double pct;
 583
 584    /* Stop the timer if needed */
 585    if (!cpu_throttle_get_percentage()) {
 586        return;
 587    }
 588    CPU_FOREACH(cpu) {
 589        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 590            async_run_on_cpu(cpu, cpu_throttle_thread,
 591                             RUN_ON_CPU_NULL);
 592        }
 593    }
 594
 595    pct = (double)cpu_throttle_get_percentage()/100;
 596    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 597                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 598}
 599
 600void cpu_throttle_set(int new_throttle_pct)
 601{
 602    /* Ensure throttle percentage is within valid range */
 603    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 604    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 605
 606    atomic_set(&throttle_percentage, new_throttle_pct);
 607
 608    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 609                                       CPU_THROTTLE_TIMESLICE_NS);
 610}
 611
 612void cpu_throttle_stop(void)
 613{
 614    atomic_set(&throttle_percentage, 0);
 615}
 616
 617bool cpu_throttle_active(void)
 618{
 619    return (cpu_throttle_get_percentage() != 0);
 620}
 621
 622int cpu_throttle_get_percentage(void)
 623{
 624    return atomic_read(&throttle_percentage);
 625}
 626
 627void cpu_ticks_init(void)
 628{
 629    seqlock_init(&timers_state.vm_clock_seqlock);
 630    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 631    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 632                                           cpu_throttle_timer_tick, NULL);
 633}
 634
 635void configure_icount(QemuOpts *opts, Error **errp)
 636{
 637    const char *option;
 638    char *rem_str = NULL;
 639
 640    option = qemu_opt_get(opts, "shift");
 641    if (!option) {
 642        if (qemu_opt_get(opts, "align") != NULL) {
 643            error_setg(errp, "Please specify shift option when using align");
 644        }
 645        return;
 646    }
 647
 648    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 649    if (icount_sleep) {
 650        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 651                                         icount_timer_cb, NULL);
 652    }
 653
 654    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 655
 656    if (icount_align_option && !icount_sleep) {
 657        error_setg(errp, "align=on and sleep=off are incompatible");
 658    }
 659    if (strcmp(option, "auto") != 0) {
 660        errno = 0;
 661        icount_time_shift = strtol(option, &rem_str, 0);
 662        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 663            error_setg(errp, "icount: Invalid shift value");
 664        }
 665        use_icount = 1;
 666        return;
 667    } else if (icount_align_option) {
 668        error_setg(errp, "shift=auto and align=on are incompatible");
 669    } else if (!icount_sleep) {
 670        error_setg(errp, "shift=auto and sleep=off are incompatible");
 671    }
 672
 673    use_icount = 2;
 674
 675    /* 125MIPS seems a reasonable initial guess at the guest speed.
 676       It will be corrected fairly quickly anyway.  */
 677    icount_time_shift = 3;
 678
 679    /* Have both realtime and virtual time triggers for speed adjustment.
 680       The realtime trigger catches emulated time passing too slowly,
 681       the virtual time trigger catches emulated time passing too fast.
 682       Realtime triggers occur even when idle, so use them less frequently
 683       than VM triggers.  */
 684    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 685                                   icount_adjust_rt, NULL);
 686    timer_mod(icount_rt_timer,
 687                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 688    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 689                                        icount_adjust_vm, NULL);
 690    timer_mod(icount_vm_timer,
 691                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 692                   NANOSECONDS_PER_SECOND / 10);
 693}
 694
 695/***********************************************************/
 696void hw_error(const char *fmt, ...)
 697{
 698    va_list ap;
 699    CPUState *cpu;
 700
 701    va_start(ap, fmt);
 702    fprintf(stderr, "qemu: hardware error: ");
 703    vfprintf(stderr, fmt, ap);
 704    fprintf(stderr, "\n");
 705    CPU_FOREACH(cpu) {
 706        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 707        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 708    }
 709    va_end(ap);
 710    abort();
 711}
 712
 713void cpu_synchronize_all_states(void)
 714{
 715    CPUState *cpu;
 716
 717    CPU_FOREACH(cpu) {
 718        cpu_synchronize_state(cpu);
 719    }
 720}
 721
 722void cpu_synchronize_all_post_reset(void)
 723{
 724    CPUState *cpu;
 725
 726    CPU_FOREACH(cpu) {
 727        cpu_synchronize_post_reset(cpu);
 728    }
 729}
 730
 731void cpu_synchronize_all_post_init(void)
 732{
 733    CPUState *cpu;
 734
 735    CPU_FOREACH(cpu) {
 736        cpu_synchronize_post_init(cpu);
 737    }
 738}
 739
 740static int do_vm_stop(RunState state)
 741{
 742    int ret = 0;
 743
 744    if (runstate_is_running()) {
 745        cpu_disable_ticks();
 746        pause_all_vcpus();
 747        runstate_set(state);
 748        vm_state_notify(0, state);
 749        qapi_event_send_stop(&error_abort);
 750    }
 751
 752    bdrv_drain_all();
 753    replay_disable_events();
 754    ret = bdrv_flush_all();
 755
 756    return ret;
 757}
 758
 759static bool cpu_can_run(CPUState *cpu)
 760{
 761    if (cpu->stop) {
 762        return false;
 763    }
 764    if (cpu_is_stopped(cpu)) {
 765        return false;
 766    }
 767    return true;
 768}
 769
 770static void cpu_handle_guest_debug(CPUState *cpu)
 771{
 772    gdb_set_stop_cpu(cpu);
 773    qemu_system_debug_request();
 774    cpu->stopped = true;
 775}
 776
 777#ifdef CONFIG_LINUX
 778static void sigbus_reraise(void)
 779{
 780    sigset_t set;
 781    struct sigaction action;
 782
 783    memset(&action, 0, sizeof(action));
 784    action.sa_handler = SIG_DFL;
 785    if (!sigaction(SIGBUS, &action, NULL)) {
 786        raise(SIGBUS);
 787        sigemptyset(&set);
 788        sigaddset(&set, SIGBUS);
 789        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 790    }
 791    perror("Failed to re-raise SIGBUS!\n");
 792    abort();
 793}
 794
 795static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 796                           void *ctx)
 797{
 798    if (kvm_on_sigbus(siginfo->ssi_code,
 799                      (void *)(intptr_t)siginfo->ssi_addr)) {
 800        sigbus_reraise();
 801    }
 802}
 803
 804static void qemu_init_sigbus(void)
 805{
 806    struct sigaction action;
 807
 808    memset(&action, 0, sizeof(action));
 809    action.sa_flags = SA_SIGINFO;
 810    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
 811    sigaction(SIGBUS, &action, NULL);
 812
 813    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 814}
 815
 816static void qemu_kvm_eat_signals(CPUState *cpu)
 817{
 818    struct timespec ts = { 0, 0 };
 819    siginfo_t siginfo;
 820    sigset_t waitset;
 821    sigset_t chkset;
 822    int r;
 823
 824    sigemptyset(&waitset);
 825    sigaddset(&waitset, SIG_IPI);
 826    sigaddset(&waitset, SIGBUS);
 827
 828    do {
 829        r = sigtimedwait(&waitset, &siginfo, &ts);
 830        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
 831            perror("sigtimedwait");
 832            exit(1);
 833        }
 834
 835        switch (r) {
 836        case SIGBUS:
 837            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
 838                sigbus_reraise();
 839            }
 840            break;
 841        default:
 842            break;
 843        }
 844
 845        r = sigpending(&chkset);
 846        if (r == -1) {
 847            perror("sigpending");
 848            exit(1);
 849        }
 850    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
 851}
 852
 853#else /* !CONFIG_LINUX */
 854
 855static void qemu_init_sigbus(void)
 856{
 857}
 858
 859static void qemu_kvm_eat_signals(CPUState *cpu)
 860{
 861}
 862#endif /* !CONFIG_LINUX */
 863
 864#ifndef _WIN32
 865static void dummy_signal(int sig)
 866{
 867}
 868
 869static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 870{
 871    int r;
 872    sigset_t set;
 873    struct sigaction sigact;
 874
 875    memset(&sigact, 0, sizeof(sigact));
 876    sigact.sa_handler = dummy_signal;
 877    sigaction(SIG_IPI, &sigact, NULL);
 878
 879    pthread_sigmask(SIG_BLOCK, NULL, &set);
 880    sigdelset(&set, SIG_IPI);
 881    sigdelset(&set, SIGBUS);
 882    r = kvm_set_signal_mask(cpu, &set);
 883    if (r) {
 884        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
 885        exit(1);
 886    }
 887}
 888
 889#else /* _WIN32 */
 890static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 891{
 892    abort();
 893}
 894#endif /* _WIN32 */
 895
 896static QemuMutex qemu_global_mutex;
 897static QemuCond qemu_io_proceeded_cond;
 898static unsigned iothread_requesting_mutex;
 899
 900static QemuThread io_thread;
 901
 902/* cpu creation */
 903static QemuCond qemu_cpu_cond;
 904/* system init */
 905static QemuCond qemu_pause_cond;
 906
 907void qemu_init_cpu_loop(void)
 908{
 909    qemu_init_sigbus();
 910    qemu_cond_init(&qemu_cpu_cond);
 911    qemu_cond_init(&qemu_pause_cond);
 912    qemu_cond_init(&qemu_io_proceeded_cond);
 913    qemu_mutex_init(&qemu_global_mutex);
 914
 915    qemu_thread_get_self(&io_thread);
 916}
 917
 918void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 919{
 920    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
 921}
 922
 923static void qemu_kvm_destroy_vcpu(CPUState *cpu)
 924{
 925    if (kvm_destroy_vcpu(cpu) < 0) {
 926        error_report("kvm_destroy_vcpu failed");
 927        exit(EXIT_FAILURE);
 928    }
 929}
 930
 931static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 932{
 933}
 934
 935static void qemu_wait_io_event_common(CPUState *cpu)
 936{
 937    if (cpu->stop) {
 938        cpu->stop = false;
 939        cpu->stopped = true;
 940        qemu_cond_broadcast(&qemu_pause_cond);
 941    }
 942    process_queued_cpu_work(cpu);
 943    cpu->thread_kicked = false;
 944}
 945
 946static void qemu_tcg_wait_io_event(CPUState *cpu)
 947{
 948    while (all_cpu_threads_idle()) {
 949        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
 950    }
 951
 952    while (iothread_requesting_mutex) {
 953        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
 954    }
 955
 956    CPU_FOREACH(cpu) {
 957        qemu_wait_io_event_common(cpu);
 958    }
 959}
 960
 961static void qemu_kvm_wait_io_event(CPUState *cpu)
 962{
 963    while (cpu_thread_is_idle(cpu)) {
 964        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
 965    }
 966
 967    qemu_kvm_eat_signals(cpu);
 968    qemu_wait_io_event_common(cpu);
 969}
 970
 971static void *qemu_kvm_cpu_thread_fn(void *arg)
 972{
 973    CPUState *cpu = arg;
 974    int r;
 975
 976    rcu_register_thread();
 977
 978    qemu_mutex_lock_iothread();
 979    qemu_thread_get_self(cpu->thread);
 980    cpu->thread_id = qemu_get_thread_id();
 981    cpu->can_do_io = 1;
 982    current_cpu = cpu;
 983
 984    r = kvm_init_vcpu(cpu);
 985    if (r < 0) {
 986        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
 987        exit(1);
 988    }
 989
 990    qemu_kvm_init_cpu_signals(cpu);
 991
 992    /* signal CPU creation */
 993    cpu->created = true;
 994    qemu_cond_signal(&qemu_cpu_cond);
 995
 996    do {
 997        if (cpu_can_run(cpu)) {
 998            r = kvm_cpu_exec(cpu);
 999            if (r == EXCP_DEBUG) {
1000                cpu_handle_guest_debug(cpu);
1001            }
1002        }
1003        qemu_kvm_wait_io_event(cpu);
1004    } while (!cpu->unplug || cpu_can_run(cpu));
1005
1006    qemu_kvm_destroy_vcpu(cpu);
1007    cpu->created = false;
1008    qemu_cond_signal(&qemu_cpu_cond);
1009    qemu_mutex_unlock_iothread();
1010    return NULL;
1011}
1012
1013static void *qemu_dummy_cpu_thread_fn(void *arg)
1014{
1015#ifdef _WIN32
1016    fprintf(stderr, "qtest is not supported under Windows\n");
1017    exit(1);
1018#else
1019    CPUState *cpu = arg;
1020    sigset_t waitset;
1021    int r;
1022
1023    rcu_register_thread();
1024
1025    qemu_mutex_lock_iothread();
1026    qemu_thread_get_self(cpu->thread);
1027    cpu->thread_id = qemu_get_thread_id();
1028    cpu->can_do_io = 1;
1029
1030    sigemptyset(&waitset);
1031    sigaddset(&waitset, SIG_IPI);
1032
1033    /* signal CPU creation */
1034    cpu->created = true;
1035    qemu_cond_signal(&qemu_cpu_cond);
1036
1037    current_cpu = cpu;
1038    while (1) {
1039        current_cpu = NULL;
1040        qemu_mutex_unlock_iothread();
1041        do {
1042            int sig;
1043            r = sigwait(&waitset, &sig);
1044        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045        if (r == -1) {
1046            perror("sigwait");
1047            exit(1);
1048        }
1049        qemu_mutex_lock_iothread();
1050        current_cpu = cpu;
1051        qemu_wait_io_event_common(cpu);
1052    }
1053
1054    return NULL;
1055#endif
1056}
1057
1058static int64_t tcg_get_icount_limit(void)
1059{
1060    int64_t deadline;
1061
1062    if (replay_mode != REPLAY_MODE_PLAY) {
1063        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1064
1065        /* Maintain prior (possibly buggy) behaviour where if no deadline
1066         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1067         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1068         * nanoseconds.
1069         */
1070        if ((deadline < 0) || (deadline > INT32_MAX)) {
1071            deadline = INT32_MAX;
1072        }
1073
1074        return qemu_icount_round(deadline);
1075    } else {
1076        return replay_get_instructions();
1077    }
1078}
1079
1080static void handle_icount_deadline(void)
1081{
1082    if (use_icount) {
1083        int64_t deadline =
1084            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1085
1086        if (deadline == 0) {
1087            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1088        }
1089    }
1090}
1091
1092static int tcg_cpu_exec(CPUState *cpu)
1093{
1094    int ret;
1095#ifdef CONFIG_PROFILER
1096    int64_t ti;
1097#endif
1098
1099#ifdef CONFIG_PROFILER
1100    ti = profile_getclock();
1101#endif
1102    if (use_icount) {
1103        int64_t count;
1104        int decr;
1105        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1106                                    + cpu->icount_extra);
1107        cpu->icount_decr.u16.low = 0;
1108        cpu->icount_extra = 0;
1109        count = tcg_get_icount_limit();
1110        timers_state.qemu_icount += count;
1111        decr = (count > 0xffff) ? 0xffff : count;
1112        count -= decr;
1113        cpu->icount_decr.u16.low = decr;
1114        cpu->icount_extra = count;
1115    }
1116    cpu_exec_start(cpu);
1117    ret = cpu_exec(cpu);
1118    cpu_exec_end(cpu);
1119#ifdef CONFIG_PROFILER
1120    tcg_time += profile_getclock() - ti;
1121#endif
1122    if (use_icount) {
1123        /* Fold pending instructions back into the
1124           instruction counter, and clear the interrupt flag.  */
1125        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1126                        + cpu->icount_extra);
1127        cpu->icount_decr.u32 = 0;
1128        cpu->icount_extra = 0;
1129        replay_account_executed_instructions();
1130    }
1131    return ret;
1132}
1133
1134/* Destroy any remaining vCPUs which have been unplugged and have
1135 * finished running
1136 */
1137static void deal_with_unplugged_cpus(void)
1138{
1139    CPUState *cpu;
1140
1141    CPU_FOREACH(cpu) {
1142        if (cpu->unplug && !cpu_can_run(cpu)) {
1143            qemu_tcg_destroy_vcpu(cpu);
1144            cpu->created = false;
1145            qemu_cond_signal(&qemu_cpu_cond);
1146            break;
1147        }
1148    }
1149}
1150
1151static void *qemu_tcg_cpu_thread_fn(void *arg)
1152{
1153    CPUState *cpu = arg;
1154
1155    rcu_register_thread();
1156
1157    qemu_mutex_lock_iothread();
1158    qemu_thread_get_self(cpu->thread);
1159
1160    CPU_FOREACH(cpu) {
1161        cpu->thread_id = qemu_get_thread_id();
1162        cpu->created = true;
1163        cpu->can_do_io = 1;
1164    }
1165    qemu_cond_signal(&qemu_cpu_cond);
1166
1167    /* wait for initial kick-off after machine start */
1168    while (first_cpu->stopped) {
1169        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1170
1171        /* process any pending work */
1172        CPU_FOREACH(cpu) {
1173            qemu_wait_io_event_common(cpu);
1174        }
1175    }
1176
1177    /* process any pending work */
1178    atomic_mb_set(&exit_request, 1);
1179
1180    cpu = first_cpu;
1181
1182    while (1) {
1183        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1184        qemu_account_warp_timer();
1185
1186        if (!cpu) {
1187            cpu = first_cpu;
1188        }
1189
1190        for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1191
1192            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1193                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1194
1195            if (cpu_can_run(cpu)) {
1196                int r;
1197                r = tcg_cpu_exec(cpu);
1198                if (r == EXCP_DEBUG) {
1199                    cpu_handle_guest_debug(cpu);
1200                    break;
1201                }
1202            } else if (cpu->stop || cpu->stopped) {
1203                if (cpu->unplug) {
1204                    cpu = CPU_NEXT(cpu);
1205                }
1206                break;
1207            }
1208
1209        } /* for cpu.. */
1210
1211        /* Pairs with smp_wmb in qemu_cpu_kick.  */
1212        atomic_mb_set(&exit_request, 0);
1213
1214        handle_icount_deadline();
1215
1216        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1217        deal_with_unplugged_cpus();
1218    }
1219
1220    return NULL;
1221}
1222
1223static void qemu_cpu_kick_thread(CPUState *cpu)
1224{
1225#ifndef _WIN32
1226    int err;
1227
1228    if (cpu->thread_kicked) {
1229        return;
1230    }
1231    cpu->thread_kicked = true;
1232    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1233    if (err) {
1234        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1235        exit(1);
1236    }
1237#else /* _WIN32 */
1238    abort();
1239#endif
1240}
1241
1242static void qemu_cpu_kick_no_halt(void)
1243{
1244    CPUState *cpu;
1245    /* Ensure whatever caused the exit has reached the CPU threads before
1246     * writing exit_request.
1247     */
1248    atomic_mb_set(&exit_request, 1);
1249    cpu = atomic_mb_read(&tcg_current_cpu);
1250    if (cpu) {
1251        cpu_exit(cpu);
1252    }
1253}
1254
1255void qemu_cpu_kick(CPUState *cpu)
1256{
1257    qemu_cond_broadcast(cpu->halt_cond);
1258    if (tcg_enabled()) {
1259        qemu_cpu_kick_no_halt();
1260    } else {
1261        qemu_cpu_kick_thread(cpu);
1262    }
1263}
1264
1265void qemu_cpu_kick_self(void)
1266{
1267    assert(current_cpu);
1268    qemu_cpu_kick_thread(current_cpu);
1269}
1270
1271bool qemu_cpu_is_self(CPUState *cpu)
1272{
1273    return qemu_thread_is_self(cpu->thread);
1274}
1275
1276bool qemu_in_vcpu_thread(void)
1277{
1278    return current_cpu && qemu_cpu_is_self(current_cpu);
1279}
1280
1281static __thread bool iothread_locked = false;
1282
1283bool qemu_mutex_iothread_locked(void)
1284{
1285    return iothread_locked;
1286}
1287
1288void qemu_mutex_lock_iothread(void)
1289{
1290    atomic_inc(&iothread_requesting_mutex);
1291    /* In the simple case there is no need to bump the VCPU thread out of
1292     * TCG code execution.
1293     */
1294    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1295        !first_cpu || !first_cpu->created) {
1296        qemu_mutex_lock(&qemu_global_mutex);
1297        atomic_dec(&iothread_requesting_mutex);
1298    } else {
1299        if (qemu_mutex_trylock(&qemu_global_mutex)) {
1300            qemu_cpu_kick_no_halt();
1301            qemu_mutex_lock(&qemu_global_mutex);
1302        }
1303        atomic_dec(&iothread_requesting_mutex);
1304        qemu_cond_broadcast(&qemu_io_proceeded_cond);
1305    }
1306    iothread_locked = true;
1307}
1308
1309void qemu_mutex_unlock_iothread(void)
1310{
1311    iothread_locked = false;
1312    qemu_mutex_unlock(&qemu_global_mutex);
1313}
1314
1315static bool all_vcpus_paused(void)
1316{
1317    CPUState *cpu;
1318
1319    CPU_FOREACH(cpu) {
1320        if (!cpu->stopped) {
1321            return false;
1322        }
1323    }
1324
1325    return true;
1326}
1327
1328void pause_all_vcpus(void)
1329{
1330    CPUState *cpu;
1331
1332    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1333    CPU_FOREACH(cpu) {
1334        cpu->stop = true;
1335        qemu_cpu_kick(cpu);
1336    }
1337
1338    if (qemu_in_vcpu_thread()) {
1339        cpu_stop_current();
1340        if (!kvm_enabled()) {
1341            CPU_FOREACH(cpu) {
1342                cpu->stop = false;
1343                cpu->stopped = true;
1344            }
1345            return;
1346        }
1347    }
1348
1349    while (!all_vcpus_paused()) {
1350        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1351        CPU_FOREACH(cpu) {
1352            qemu_cpu_kick(cpu);
1353        }
1354    }
1355}
1356
1357void cpu_resume(CPUState *cpu)
1358{
1359    cpu->stop = false;
1360    cpu->stopped = false;
1361    qemu_cpu_kick(cpu);
1362}
1363
1364void resume_all_vcpus(void)
1365{
1366    CPUState *cpu;
1367
1368    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1369    CPU_FOREACH(cpu) {
1370        cpu_resume(cpu);
1371    }
1372}
1373
1374void cpu_remove(CPUState *cpu)
1375{
1376    cpu->stop = true;
1377    cpu->unplug = true;
1378    qemu_cpu_kick(cpu);
1379}
1380
1381void cpu_remove_sync(CPUState *cpu)
1382{
1383    cpu_remove(cpu);
1384    while (cpu->created) {
1385        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1386    }
1387}
1388
1389/* For temporary buffers for forming a name */
1390#define VCPU_THREAD_NAME_SIZE 16
1391
1392static void qemu_tcg_init_vcpu(CPUState *cpu)
1393{
1394    char thread_name[VCPU_THREAD_NAME_SIZE];
1395    static QemuCond *tcg_halt_cond;
1396    static QemuThread *tcg_cpu_thread;
1397
1398    /* share a single thread for all cpus with TCG */
1399    if (!tcg_cpu_thread) {
1400        cpu->thread = g_malloc0(sizeof(QemuThread));
1401        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1402        qemu_cond_init(cpu->halt_cond);
1403        tcg_halt_cond = cpu->halt_cond;
1404        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1405                 cpu->cpu_index);
1406        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1407                           cpu, QEMU_THREAD_JOINABLE);
1408#ifdef _WIN32
1409        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1410#endif
1411        while (!cpu->created) {
1412            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1413        }
1414        tcg_cpu_thread = cpu->thread;
1415    } else {
1416        cpu->thread = tcg_cpu_thread;
1417        cpu->halt_cond = tcg_halt_cond;
1418    }
1419}
1420
1421static void qemu_kvm_start_vcpu(CPUState *cpu)
1422{
1423    char thread_name[VCPU_THREAD_NAME_SIZE];
1424
1425    cpu->thread = g_malloc0(sizeof(QemuThread));
1426    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1427    qemu_cond_init(cpu->halt_cond);
1428    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1429             cpu->cpu_index);
1430    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1431                       cpu, QEMU_THREAD_JOINABLE);
1432    while (!cpu->created) {
1433        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1434    }
1435}
1436
1437static void qemu_dummy_start_vcpu(CPUState *cpu)
1438{
1439    char thread_name[VCPU_THREAD_NAME_SIZE];
1440
1441    cpu->thread = g_malloc0(sizeof(QemuThread));
1442    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1443    qemu_cond_init(cpu->halt_cond);
1444    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1445             cpu->cpu_index);
1446    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1447                       QEMU_THREAD_JOINABLE);
1448    while (!cpu->created) {
1449        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1450    }
1451}
1452
1453void qemu_init_vcpu(CPUState *cpu)
1454{
1455    cpu->nr_cores = smp_cores;
1456    cpu->nr_threads = smp_threads;
1457    cpu->stopped = true;
1458
1459    if (!cpu->as) {
1460        /* If the target cpu hasn't set up any address spaces itself,
1461         * give it the default one.
1462         */
1463        AddressSpace *as = address_space_init_shareable(cpu->memory,
1464                                                        "cpu-memory");
1465        cpu->num_ases = 1;
1466        cpu_address_space_init(cpu, as, 0);
1467    }
1468
1469    if (kvm_enabled()) {
1470        qemu_kvm_start_vcpu(cpu);
1471    } else if (tcg_enabled()) {
1472        qemu_tcg_init_vcpu(cpu);
1473    } else {
1474        qemu_dummy_start_vcpu(cpu);
1475    }
1476}
1477
1478void cpu_stop_current(void)
1479{
1480    if (current_cpu) {
1481        current_cpu->stop = false;
1482        current_cpu->stopped = true;
1483        cpu_exit(current_cpu);
1484        qemu_cond_broadcast(&qemu_pause_cond);
1485    }
1486}
1487
1488int vm_stop(RunState state)
1489{
1490    if (qemu_in_vcpu_thread()) {
1491        qemu_system_vmstop_request_prepare();
1492        qemu_system_vmstop_request(state);
1493        /*
1494         * FIXME: should not return to device code in case
1495         * vm_stop() has been requested.
1496         */
1497        cpu_stop_current();
1498        return 0;
1499    }
1500
1501    return do_vm_stop(state);
1502}
1503
1504/* does a state transition even if the VM is already stopped,
1505   current state is forgotten forever */
1506int vm_stop_force_state(RunState state)
1507{
1508    if (runstate_is_running()) {
1509        return vm_stop(state);
1510    } else {
1511        runstate_set(state);
1512
1513        bdrv_drain_all();
1514        /* Make sure to return an error if the flush in a previous vm_stop()
1515         * failed. */
1516        return bdrv_flush_all();
1517    }
1518}
1519
1520void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1521{
1522    /* XXX: implement xxx_cpu_list for targets that still miss it */
1523#if defined(cpu_list)
1524    cpu_list(f, cpu_fprintf);
1525#endif
1526}
1527
1528CpuInfoList *qmp_query_cpus(Error **errp)
1529{
1530    CpuInfoList *head = NULL, *cur_item = NULL;
1531    CPUState *cpu;
1532
1533    CPU_FOREACH(cpu) {
1534        CpuInfoList *info;
1535#if defined(TARGET_I386)
1536        X86CPU *x86_cpu = X86_CPU(cpu);
1537        CPUX86State *env = &x86_cpu->env;
1538#elif defined(TARGET_PPC)
1539        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1540        CPUPPCState *env = &ppc_cpu->env;
1541#elif defined(TARGET_SPARC)
1542        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1543        CPUSPARCState *env = &sparc_cpu->env;
1544#elif defined(TARGET_MIPS)
1545        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1546        CPUMIPSState *env = &mips_cpu->env;
1547#elif defined(TARGET_TRICORE)
1548        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1549        CPUTriCoreState *env = &tricore_cpu->env;
1550#endif
1551
1552        cpu_synchronize_state(cpu);
1553
1554        info = g_malloc0(sizeof(*info));
1555        info->value = g_malloc0(sizeof(*info->value));
1556        info->value->CPU = cpu->cpu_index;
1557        info->value->current = (cpu == first_cpu);
1558        info->value->halted = cpu->halted;
1559        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1560        info->value->thread_id = cpu->thread_id;
1561#if defined(TARGET_I386)
1562        info->value->arch = CPU_INFO_ARCH_X86;
1563        info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1564#elif defined(TARGET_PPC)
1565        info->value->arch = CPU_INFO_ARCH_PPC;
1566        info->value->u.ppc.nip = env->nip;
1567#elif defined(TARGET_SPARC)
1568        info->value->arch = CPU_INFO_ARCH_SPARC;
1569        info->value->u.q_sparc.pc = env->pc;
1570        info->value->u.q_sparc.npc = env->npc;
1571#elif defined(TARGET_MIPS)
1572        info->value->arch = CPU_INFO_ARCH_MIPS;
1573        info->value->u.q_mips.PC = env->active_tc.PC;
1574#elif defined(TARGET_TRICORE)
1575        info->value->arch = CPU_INFO_ARCH_TRICORE;
1576        info->value->u.tricore.PC = env->PC;
1577#else
1578        info->value->arch = CPU_INFO_ARCH_OTHER;
1579#endif
1580
1581        /* XXX: waiting for the qapi to support GSList */
1582        if (!cur_item) {
1583            head = cur_item = info;
1584        } else {
1585            cur_item->next = info;
1586            cur_item = info;
1587        }
1588    }
1589
1590    return head;
1591}
1592
1593void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1594                 bool has_cpu, int64_t cpu_index, Error **errp)
1595{
1596    FILE *f;
1597    uint32_t l;
1598    CPUState *cpu;
1599    uint8_t buf[1024];
1600    int64_t orig_addr = addr, orig_size = size;
1601
1602    if (!has_cpu) {
1603        cpu_index = 0;
1604    }
1605
1606    cpu = qemu_get_cpu(cpu_index);
1607    if (cpu == NULL) {
1608        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1609                   "a CPU number");
1610        return;
1611    }
1612
1613    f = fopen(filename, "wb");
1614    if (!f) {
1615        error_setg_file_open(errp, errno, filename);
1616        return;
1617    }
1618
1619    while (size != 0) {
1620        l = sizeof(buf);
1621        if (l > size)
1622            l = size;
1623        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1624            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1625                             " specified", orig_addr, orig_size);
1626            goto exit;
1627        }
1628        if (fwrite(buf, 1, l, f) != l) {
1629            error_setg(errp, QERR_IO_ERROR);
1630            goto exit;
1631        }
1632        addr += l;
1633        size -= l;
1634    }
1635
1636exit:
1637    fclose(f);
1638}
1639
1640void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1641                  Error **errp)
1642{
1643    FILE *f;
1644    uint32_t l;
1645    uint8_t buf[1024];
1646
1647    f = fopen(filename, "wb");
1648    if (!f) {
1649        error_setg_file_open(errp, errno, filename);
1650        return;
1651    }
1652
1653    while (size != 0) {
1654        l = sizeof(buf);
1655        if (l > size)
1656            l = size;
1657        cpu_physical_memory_read(addr, buf, l);
1658        if (fwrite(buf, 1, l, f) != l) {
1659            error_setg(errp, QERR_IO_ERROR);
1660            goto exit;
1661        }
1662        addr += l;
1663        size -= l;
1664    }
1665
1666exit:
1667    fclose(f);
1668}
1669
1670void qmp_inject_nmi(Error **errp)
1671{
1672    nmi_monitor_handle(monitor_get_cpu_index(), errp);
1673}
1674
1675void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1676{
1677    if (!use_icount) {
1678        return;
1679    }
1680
1681    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1682                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1683    if (icount_align_option) {
1684        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1685        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1686    } else {
1687        cpu_fprintf(f, "Max guest delay     NA\n");
1688        cpu_fprintf(f, "Max guest advance   NA\n");
1689    }
1690}
1691