qemu/cpus.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "config-host.h"
  27
  28#include "monitor.h"
  29#include "sysemu.h"
  30#include "gdbstub.h"
  31#include "dma.h"
  32#include "kvm.h"
  33#include "qmp-commands.h"
  34
  35#include "qemu-thread.h"
  36#include "cpus.h"
  37#include "qtest.h"
  38#include "main-loop.h"
  39
  40#ifndef _WIN32
  41#include "compatfd.h"
  42#endif
  43
  44#ifdef CONFIG_LINUX
  45
  46#include <sys/prctl.h>
  47
  48#ifndef PR_MCE_KILL
  49#define PR_MCE_KILL 33
  50#endif
  51
  52#ifndef PR_MCE_KILL_SET
  53#define PR_MCE_KILL_SET 1
  54#endif
  55
  56#ifndef PR_MCE_KILL_EARLY
  57#define PR_MCE_KILL_EARLY 1
  58#endif
  59
  60#endif /* CONFIG_LINUX */
  61
  62static CPUArchState *next_cpu;
  63
  64/***********************************************************/
  65/* guest cycle counter */
  66
  67/* Conversion factor from emulated instructions to virtual clock ticks.  */
  68static int icount_time_shift;
  69/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
  70#define MAX_ICOUNT_SHIFT 10
  71/* Compensate for varying guest execution speed.  */
  72static int64_t qemu_icount_bias;
  73static QEMUTimer *icount_rt_timer;
  74static QEMUTimer *icount_vm_timer;
  75static QEMUTimer *icount_warp_timer;
  76static int64_t vm_clock_warp_start;
  77static int64_t qemu_icount;
  78
  79typedef struct TimersState {
  80    int64_t cpu_ticks_prev;
  81    int64_t cpu_ticks_offset;
  82    int64_t cpu_clock_offset;
  83    int32_t cpu_ticks_enabled;
  84    int64_t dummy;
  85} TimersState;
  86
  87TimersState timers_state;
  88
  89/* Return the virtual CPU time, based on the instruction counter.  */
  90int64_t cpu_get_icount(void)
  91{
  92    int64_t icount;
  93    CPUArchState *env = cpu_single_env;
  94
  95    icount = qemu_icount;
  96    if (env) {
  97        if (!can_do_io(env)) {
  98            fprintf(stderr, "Bad clock read\n");
  99        }
 100        icount -= (env->icount_decr.u16.low + env->icount_extra);
 101    }
 102    return qemu_icount_bias + (icount << icount_time_shift);
 103}
 104
 105/* return the host CPU cycle counter and handle stop/restart */
 106int64_t cpu_get_ticks(void)
 107{
 108    if (use_icount) {
 109        return cpu_get_icount();
 110    }
 111    if (!timers_state.cpu_ticks_enabled) {
 112        return timers_state.cpu_ticks_offset;
 113    } else {
 114        int64_t ticks;
 115        ticks = cpu_get_real_ticks();
 116        if (timers_state.cpu_ticks_prev > ticks) {
 117            /* Note: non increasing ticks may happen if the host uses
 118               software suspend */
 119            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 120        }
 121        timers_state.cpu_ticks_prev = ticks;
 122        return ticks + timers_state.cpu_ticks_offset;
 123    }
 124}
 125
 126/* return the host CPU monotonic timer and handle stop/restart */
 127int64_t cpu_get_clock(void)
 128{
 129    int64_t ti;
 130    if (!timers_state.cpu_ticks_enabled) {
 131        return timers_state.cpu_clock_offset;
 132    } else {
 133        ti = get_clock();
 134        return ti + timers_state.cpu_clock_offset;
 135    }
 136}
 137
 138/* enable cpu_get_ticks() */
 139void cpu_enable_ticks(void)
 140{
 141    if (!timers_state.cpu_ticks_enabled) {
 142        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 143        timers_state.cpu_clock_offset -= get_clock();
 144        timers_state.cpu_ticks_enabled = 1;
 145    }
 146}
 147
 148/* disable cpu_get_ticks() : the clock is stopped. You must not call
 149   cpu_get_ticks() after that.  */
 150void cpu_disable_ticks(void)
 151{
 152    if (timers_state.cpu_ticks_enabled) {
 153        timers_state.cpu_ticks_offset = cpu_get_ticks();
 154        timers_state.cpu_clock_offset = cpu_get_clock();
 155        timers_state.cpu_ticks_enabled = 0;
 156    }
 157}
 158
 159/* Correlation between real and virtual time is always going to be
 160   fairly approximate, so ignore small variation.
 161   When the guest is idle real and virtual time will be aligned in
 162   the IO wait loop.  */
 163#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
 164
 165static void icount_adjust(void)
 166{
 167    int64_t cur_time;
 168    int64_t cur_icount;
 169    int64_t delta;
 170    static int64_t last_delta;
 171    /* If the VM is not running, then do nothing.  */
 172    if (!runstate_is_running()) {
 173        return;
 174    }
 175    cur_time = cpu_get_clock();
 176    cur_icount = qemu_get_clock_ns(vm_clock);
 177    delta = cur_icount - cur_time;
 178    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 179    if (delta > 0
 180        && last_delta + ICOUNT_WOBBLE < delta * 2
 181        && icount_time_shift > 0) {
 182        /* The guest is getting too far ahead.  Slow time down.  */
 183        icount_time_shift--;
 184    }
 185    if (delta < 0
 186        && last_delta - ICOUNT_WOBBLE > delta * 2
 187        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 188        /* The guest is getting too far behind.  Speed time up.  */
 189        icount_time_shift++;
 190    }
 191    last_delta = delta;
 192    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
 193}
 194
 195static void icount_adjust_rt(void *opaque)
 196{
 197    qemu_mod_timer(icount_rt_timer,
 198                   qemu_get_clock_ms(rt_clock) + 1000);
 199    icount_adjust();
 200}
 201
 202static void icount_adjust_vm(void *opaque)
 203{
 204    qemu_mod_timer(icount_vm_timer,
 205                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
 206    icount_adjust();
 207}
 208
 209static int64_t qemu_icount_round(int64_t count)
 210{
 211    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 212}
 213
 214static void icount_warp_rt(void *opaque)
 215{
 216    if (vm_clock_warp_start == -1) {
 217        return;
 218    }
 219
 220    if (runstate_is_running()) {
 221        int64_t clock = qemu_get_clock_ns(rt_clock);
 222        int64_t warp_delta = clock - vm_clock_warp_start;
 223        if (use_icount == 1) {
 224            qemu_icount_bias += warp_delta;
 225        } else {
 226            /*
 227             * In adaptive mode, do not let the vm_clock run too
 228             * far ahead of real time.
 229             */
 230            int64_t cur_time = cpu_get_clock();
 231            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
 232            int64_t delta = cur_time - cur_icount;
 233            qemu_icount_bias += MIN(warp_delta, delta);
 234        }
 235        if (qemu_clock_expired(vm_clock)) {
 236            qemu_notify_event();
 237        }
 238    }
 239    vm_clock_warp_start = -1;
 240}
 241
 242void qtest_clock_warp(int64_t dest)
 243{
 244    int64_t clock = qemu_get_clock_ns(vm_clock);
 245    assert(qtest_enabled());
 246    while (clock < dest) {
 247        int64_t deadline = qemu_clock_deadline(vm_clock);
 248        int64_t warp = MIN(dest - clock, deadline);
 249        qemu_icount_bias += warp;
 250        qemu_run_timers(vm_clock);
 251        clock = qemu_get_clock_ns(vm_clock);
 252    }
 253    qemu_notify_event();
 254}
 255
 256void qemu_clock_warp(QEMUClock *clock)
 257{
 258    int64_t deadline;
 259
 260    /*
 261     * There are too many global variables to make the "warp" behavior
 262     * applicable to other clocks.  But a clock argument removes the
 263     * need for if statements all over the place.
 264     */
 265    if (clock != vm_clock || !use_icount) {
 266        return;
 267    }
 268
 269    /*
 270     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
 271     * ensures that the deadline for the timer is computed correctly below.
 272     * This also makes sure that the insn counter is synchronized before the
 273     * CPU starts running, in case the CPU is woken by an event other than
 274     * the earliest vm_clock timer.
 275     */
 276    icount_warp_rt(NULL);
 277    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
 278        qemu_del_timer(icount_warp_timer);
 279        return;
 280    }
 281
 282    if (qtest_enabled()) {
 283        /* When testing, qtest commands advance icount.  */
 284        return;
 285    }
 286
 287    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
 288    deadline = qemu_clock_deadline(vm_clock);
 289    if (deadline > 0) {
 290        /*
 291         * Ensure the vm_clock proceeds even when the virtual CPU goes to
 292         * sleep.  Otherwise, the CPU might be waiting for a future timer
 293         * interrupt to wake it up, but the interrupt never comes because
 294         * the vCPU isn't running any insns and thus doesn't advance the
 295         * vm_clock.
 296         *
 297         * An extreme solution for this problem would be to never let VCPUs
 298         * sleep in icount mode if there is a pending vm_clock timer; rather
 299         * time could just advance to the next vm_clock event.  Instead, we
 300         * do stop VCPUs and only advance vm_clock after some "real" time,
 301         * (related to the time left until the next event) has passed.  This
 302         * rt_clock timer will do this.  This avoids that the warps are too
 303         * visible externally---for example, you will not be sending network
 304         * packets continuously instead of every 100ms.
 305         */
 306        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
 307    } else {
 308        qemu_notify_event();
 309    }
 310}
 311
 312static const VMStateDescription vmstate_timers = {
 313    .name = "timer",
 314    .version_id = 2,
 315    .minimum_version_id = 1,
 316    .minimum_version_id_old = 1,
 317    .fields      = (VMStateField[]) {
 318        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 319        VMSTATE_INT64(dummy, TimersState),
 320        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 321        VMSTATE_END_OF_LIST()
 322    }
 323};
 324
 325void configure_icount(const char *option)
 326{
 327    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 328    if (!option) {
 329        return;
 330    }
 331
 332    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
 333    if (strcmp(option, "auto") != 0) {
 334        icount_time_shift = strtol(option, NULL, 0);
 335        use_icount = 1;
 336        return;
 337    }
 338
 339    use_icount = 2;
 340
 341    /* 125MIPS seems a reasonable initial guess at the guest speed.
 342       It will be corrected fairly quickly anyway.  */
 343    icount_time_shift = 3;
 344
 345    /* Have both realtime and virtual time triggers for speed adjustment.
 346       The realtime trigger catches emulated time passing too slowly,
 347       the virtual time trigger catches emulated time passing too fast.
 348       Realtime triggers occur even when idle, so use them less frequently
 349       than VM triggers.  */
 350    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
 351    qemu_mod_timer(icount_rt_timer,
 352                   qemu_get_clock_ms(rt_clock) + 1000);
 353    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
 354    qemu_mod_timer(icount_vm_timer,
 355                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
 356}
 357
 358/***********************************************************/
 359void hw_error(const char *fmt, ...)
 360{
 361    va_list ap;
 362    CPUArchState *env;
 363
 364    va_start(ap, fmt);
 365    fprintf(stderr, "qemu: hardware error: ");
 366    vfprintf(stderr, fmt, ap);
 367    fprintf(stderr, "\n");
 368    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 369        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
 370#ifdef TARGET_I386
 371        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
 372#else
 373        cpu_dump_state(env, stderr, fprintf, 0);
 374#endif
 375    }
 376    va_end(ap);
 377    abort();
 378}
 379
 380void cpu_synchronize_all_states(void)
 381{
 382    CPUArchState *cpu;
 383
 384    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
 385        cpu_synchronize_state(cpu);
 386    }
 387}
 388
 389void cpu_synchronize_all_post_reset(void)
 390{
 391    CPUArchState *cpu;
 392
 393    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
 394        cpu_synchronize_post_reset(cpu);
 395    }
 396}
 397
 398void cpu_synchronize_all_post_init(void)
 399{
 400    CPUArchState *cpu;
 401
 402    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
 403        cpu_synchronize_post_init(cpu);
 404    }
 405}
 406
 407int cpu_is_stopped(CPUArchState *env)
 408{
 409    return !runstate_is_running() || env->stopped;
 410}
 411
 412static void do_vm_stop(RunState state)
 413{
 414    if (runstate_is_running()) {
 415        cpu_disable_ticks();
 416        pause_all_vcpus();
 417        runstate_set(state);
 418        vm_state_notify(0, state);
 419        bdrv_drain_all();
 420        bdrv_flush_all();
 421        monitor_protocol_event(QEVENT_STOP, NULL);
 422    }
 423}
 424
 425static int cpu_can_run(CPUArchState *env)
 426{
 427    if (env->stop) {
 428        return 0;
 429    }
 430    if (env->stopped || !runstate_is_running()) {
 431        return 0;
 432    }
 433    return 1;
 434}
 435
 436static bool cpu_thread_is_idle(CPUArchState *env)
 437{
 438    if (env->stop || env->queued_work_first) {
 439        return false;
 440    }
 441    if (env->stopped || !runstate_is_running()) {
 442        return true;
 443    }
 444    if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) {
 445        return false;
 446    }
 447    return true;
 448}
 449
 450bool all_cpu_threads_idle(void)
 451{
 452    CPUArchState *env;
 453
 454    for (env = first_cpu; env != NULL; env = env->next_cpu) {
 455        if (!cpu_thread_is_idle(env)) {
 456            return false;
 457        }
 458    }
 459    return true;
 460}
 461
 462static void cpu_handle_guest_debug(CPUArchState *env)
 463{
 464    gdb_set_stop_cpu(env);
 465    qemu_system_debug_request();
 466    env->stopped = 1;
 467}
 468
 469static void cpu_signal(int sig)
 470{
 471    if (cpu_single_env) {
 472        cpu_exit(cpu_single_env);
 473    }
 474    exit_request = 1;
 475}
 476
 477#ifdef CONFIG_LINUX
 478static void sigbus_reraise(void)
 479{
 480    sigset_t set;
 481    struct sigaction action;
 482
 483    memset(&action, 0, sizeof(action));
 484    action.sa_handler = SIG_DFL;
 485    if (!sigaction(SIGBUS, &action, NULL)) {
 486        raise(SIGBUS);
 487        sigemptyset(&set);
 488        sigaddset(&set, SIGBUS);
 489        sigprocmask(SIG_UNBLOCK, &set, NULL);
 490    }
 491    perror("Failed to re-raise SIGBUS!\n");
 492    abort();
 493}
 494
 495static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 496                           void *ctx)
 497{
 498    if (kvm_on_sigbus(siginfo->ssi_code,
 499                      (void *)(intptr_t)siginfo->ssi_addr)) {
 500        sigbus_reraise();
 501    }
 502}
 503
 504static void qemu_init_sigbus(void)
 505{
 506    struct sigaction action;
 507
 508    memset(&action, 0, sizeof(action));
 509    action.sa_flags = SA_SIGINFO;
 510    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
 511    sigaction(SIGBUS, &action, NULL);
 512
 513    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 514}
 515
 516static void qemu_kvm_eat_signals(CPUArchState *env)
 517{
 518    struct timespec ts = { 0, 0 };
 519    siginfo_t siginfo;
 520    sigset_t waitset;
 521    sigset_t chkset;
 522    int r;
 523
 524    sigemptyset(&waitset);
 525    sigaddset(&waitset, SIG_IPI);
 526    sigaddset(&waitset, SIGBUS);
 527
 528    do {
 529        r = sigtimedwait(&waitset, &siginfo, &ts);
 530        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
 531            perror("sigtimedwait");
 532            exit(1);
 533        }
 534
 535        switch (r) {
 536        case SIGBUS:
 537            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
 538                sigbus_reraise();
 539            }
 540            break;
 541        default:
 542            break;
 543        }
 544
 545        r = sigpending(&chkset);
 546        if (r == -1) {
 547            perror("sigpending");
 548            exit(1);
 549        }
 550    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
 551}
 552
 553#else /* !CONFIG_LINUX */
 554
 555static void qemu_init_sigbus(void)
 556{
 557}
 558
 559static void qemu_kvm_eat_signals(CPUArchState *env)
 560{
 561}
 562#endif /* !CONFIG_LINUX */
 563
 564#ifndef _WIN32
 565static void dummy_signal(int sig)
 566{
 567}
 568
 569static void qemu_kvm_init_cpu_signals(CPUArchState *env)
 570{
 571    int r;
 572    sigset_t set;
 573    struct sigaction sigact;
 574
 575    memset(&sigact, 0, sizeof(sigact));
 576    sigact.sa_handler = dummy_signal;
 577    sigaction(SIG_IPI, &sigact, NULL);
 578
 579    pthread_sigmask(SIG_BLOCK, NULL, &set);
 580    sigdelset(&set, SIG_IPI);
 581    sigdelset(&set, SIGBUS);
 582    r = kvm_set_signal_mask(env, &set);
 583    if (r) {
 584        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
 585        exit(1);
 586    }
 587}
 588
 589static void qemu_tcg_init_cpu_signals(void)
 590{
 591    sigset_t set;
 592    struct sigaction sigact;
 593
 594    memset(&sigact, 0, sizeof(sigact));
 595    sigact.sa_handler = cpu_signal;
 596    sigaction(SIG_IPI, &sigact, NULL);
 597
 598    sigemptyset(&set);
 599    sigaddset(&set, SIG_IPI);
 600    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 601}
 602
 603#else /* _WIN32 */
 604static void qemu_kvm_init_cpu_signals(CPUArchState *env)
 605{
 606    abort();
 607}
 608
 609static void qemu_tcg_init_cpu_signals(void)
 610{
 611}
 612#endif /* _WIN32 */
 613
 614QemuMutex qemu_global_mutex;
 615static QemuCond qemu_io_proceeded_cond;
 616static bool iothread_requesting_mutex;
 617
 618static QemuThread io_thread;
 619
 620static QemuThread *tcg_cpu_thread;
 621static QemuCond *tcg_halt_cond;
 622
 623/* cpu creation */
 624static QemuCond qemu_cpu_cond;
 625/* system init */
 626static QemuCond qemu_pause_cond;
 627static QemuCond qemu_work_cond;
 628
 629void qemu_init_cpu_loop(void)
 630{
 631    qemu_init_sigbus();
 632    qemu_cond_init(&qemu_cpu_cond);
 633    qemu_cond_init(&qemu_pause_cond);
 634    qemu_cond_init(&qemu_work_cond);
 635    qemu_cond_init(&qemu_io_proceeded_cond);
 636    qemu_mutex_init(&qemu_global_mutex);
 637
 638    qemu_thread_get_self(&io_thread);
 639}
 640
 641void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
 642{
 643    struct qemu_work_item wi;
 644
 645    if (qemu_cpu_is_self(env)) {
 646        func(data);
 647        return;
 648    }
 649
 650    wi.func = func;
 651    wi.data = data;
 652    if (!env->queued_work_first) {
 653        env->queued_work_first = &wi;
 654    } else {
 655        env->queued_work_last->next = &wi;
 656    }
 657    env->queued_work_last = &wi;
 658    wi.next = NULL;
 659    wi.done = false;
 660
 661    qemu_cpu_kick(env);
 662    while (!wi.done) {
 663        CPUArchState *self_env = cpu_single_env;
 664
 665        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
 666        cpu_single_env = self_env;
 667    }
 668}
 669
 670static void flush_queued_work(CPUArchState *env)
 671{
 672    struct qemu_work_item *wi;
 673
 674    if (!env->queued_work_first) {
 675        return;
 676    }
 677
 678    while ((wi = env->queued_work_first)) {
 679        env->queued_work_first = wi->next;
 680        wi->func(wi->data);
 681        wi->done = true;
 682    }
 683    env->queued_work_last = NULL;
 684    qemu_cond_broadcast(&qemu_work_cond);
 685}
 686
 687static void qemu_wait_io_event_common(CPUArchState *env)
 688{
 689    if (env->stop) {
 690        env->stop = 0;
 691        env->stopped = 1;
 692        qemu_cond_signal(&qemu_pause_cond);
 693    }
 694    flush_queued_work(env);
 695    env->thread_kicked = false;
 696}
 697
 698static void qemu_tcg_wait_io_event(void)
 699{
 700    CPUArchState *env;
 701
 702    while (all_cpu_threads_idle()) {
 703       /* Start accounting real time to the virtual clock if the CPUs
 704          are idle.  */
 705        qemu_clock_warp(vm_clock);
 706        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
 707    }
 708
 709    while (iothread_requesting_mutex) {
 710        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
 711    }
 712
 713    for (env = first_cpu; env != NULL; env = env->next_cpu) {
 714        qemu_wait_io_event_common(env);
 715    }
 716}
 717
 718static void qemu_kvm_wait_io_event(CPUArchState *env)
 719{
 720    while (cpu_thread_is_idle(env)) {
 721        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
 722    }
 723
 724    qemu_kvm_eat_signals(env);
 725    qemu_wait_io_event_common(env);
 726}
 727
 728static void *qemu_kvm_cpu_thread_fn(void *arg)
 729{
 730    CPUArchState *env = arg;
 731    int r;
 732
 733    qemu_mutex_lock(&qemu_global_mutex);
 734    qemu_thread_get_self(env->thread);
 735    env->thread_id = qemu_get_thread_id();
 736    cpu_single_env = env;
 737
 738    r = kvm_init_vcpu(env);
 739    if (r < 0) {
 740        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
 741        exit(1);
 742    }
 743
 744    qemu_kvm_init_cpu_signals(env);
 745
 746    /* signal CPU creation */
 747    env->created = 1;
 748    qemu_cond_signal(&qemu_cpu_cond);
 749
 750    while (1) {
 751        if (cpu_can_run(env)) {
 752            r = kvm_cpu_exec(env);
 753            if (r == EXCP_DEBUG) {
 754                cpu_handle_guest_debug(env);
 755            }
 756        }
 757        qemu_kvm_wait_io_event(env);
 758    }
 759
 760    return NULL;
 761}
 762
 763static void *qemu_dummy_cpu_thread_fn(void *arg)
 764{
 765#ifdef _WIN32
 766    fprintf(stderr, "qtest is not supported under Windows\n");
 767    exit(1);
 768#else
 769    CPUArchState *env = arg;
 770    sigset_t waitset;
 771    int r;
 772
 773    qemu_mutex_lock_iothread();
 774    qemu_thread_get_self(env->thread);
 775    env->thread_id = qemu_get_thread_id();
 776
 777    sigemptyset(&waitset);
 778    sigaddset(&waitset, SIG_IPI);
 779
 780    /* signal CPU creation */
 781    env->created = 1;
 782    qemu_cond_signal(&qemu_cpu_cond);
 783
 784    cpu_single_env = env;
 785    while (1) {
 786        cpu_single_env = NULL;
 787        qemu_mutex_unlock_iothread();
 788        do {
 789            int sig;
 790            r = sigwait(&waitset, &sig);
 791        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
 792        if (r == -1) {
 793            perror("sigwait");
 794            exit(1);
 795        }
 796        qemu_mutex_lock_iothread();
 797        cpu_single_env = env;
 798        qemu_wait_io_event_common(env);
 799    }
 800
 801    return NULL;
 802#endif
 803}
 804
 805static void tcg_exec_all(void);
 806
 807static void *qemu_tcg_cpu_thread_fn(void *arg)
 808{
 809    CPUArchState *env = arg;
 810
 811    qemu_tcg_init_cpu_signals();
 812    qemu_thread_get_self(env->thread);
 813
 814    /* signal CPU creation */
 815    qemu_mutex_lock(&qemu_global_mutex);
 816    for (env = first_cpu; env != NULL; env = env->next_cpu) {
 817        env->thread_id = qemu_get_thread_id();
 818        env->created = 1;
 819    }
 820    qemu_cond_signal(&qemu_cpu_cond);
 821
 822    /* wait for initial kick-off after machine start */
 823    while (first_cpu->stopped) {
 824        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
 825
 826        /* process any pending work */
 827        for (env = first_cpu; env != NULL; env = env->next_cpu) {
 828            qemu_wait_io_event_common(env);
 829        }
 830    }
 831
 832    while (1) {
 833        tcg_exec_all();
 834        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
 835            qemu_notify_event();
 836        }
 837        qemu_tcg_wait_io_event();
 838    }
 839
 840    return NULL;
 841}
 842
 843static void qemu_cpu_kick_thread(CPUArchState *env)
 844{
 845#ifndef _WIN32
 846    int err;
 847
 848    err = pthread_kill(env->thread->thread, SIG_IPI);
 849    if (err) {
 850        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
 851        exit(1);
 852    }
 853#else /* _WIN32 */
 854    if (!qemu_cpu_is_self(env)) {
 855        SuspendThread(env->hThread);
 856        cpu_signal(0);
 857        ResumeThread(env->hThread);
 858    }
 859#endif
 860}
 861
 862void qemu_cpu_kick(void *_env)
 863{
 864    CPUArchState *env = _env;
 865
 866    qemu_cond_broadcast(env->halt_cond);
 867    if (!tcg_enabled() && !env->thread_kicked) {
 868        qemu_cpu_kick_thread(env);
 869        env->thread_kicked = true;
 870    }
 871}
 872
 873void qemu_cpu_kick_self(void)
 874{
 875#ifndef _WIN32
 876    assert(cpu_single_env);
 877
 878    if (!cpu_single_env->thread_kicked) {
 879        qemu_cpu_kick_thread(cpu_single_env);
 880        cpu_single_env->thread_kicked = true;
 881    }
 882#else
 883    abort();
 884#endif
 885}
 886
 887int qemu_cpu_is_self(void *_env)
 888{
 889    CPUArchState *env = _env;
 890
 891    return qemu_thread_is_self(env->thread);
 892}
 893
 894void qemu_mutex_lock_iothread(void)
 895{
 896    if (!tcg_enabled()) {
 897        qemu_mutex_lock(&qemu_global_mutex);
 898    } else {
 899        iothread_requesting_mutex = true;
 900        if (qemu_mutex_trylock(&qemu_global_mutex)) {
 901            qemu_cpu_kick_thread(first_cpu);
 902            qemu_mutex_lock(&qemu_global_mutex);
 903        }
 904        iothread_requesting_mutex = false;
 905        qemu_cond_broadcast(&qemu_io_proceeded_cond);
 906    }
 907}
 908
 909void qemu_mutex_unlock_iothread(void)
 910{
 911    qemu_mutex_unlock(&qemu_global_mutex);
 912}
 913
 914static int all_vcpus_paused(void)
 915{
 916    CPUArchState *penv = first_cpu;
 917
 918    while (penv) {
 919        if (!penv->stopped) {
 920            return 0;
 921        }
 922        penv = penv->next_cpu;
 923    }
 924
 925    return 1;
 926}
 927
 928void pause_all_vcpus(void)
 929{
 930    CPUArchState *penv = first_cpu;
 931
 932    qemu_clock_enable(vm_clock, false);
 933    while (penv) {
 934        penv->stop = 1;
 935        qemu_cpu_kick(penv);
 936        penv = penv->next_cpu;
 937    }
 938
 939    if (!qemu_thread_is_self(&io_thread)) {
 940        cpu_stop_current();
 941        if (!kvm_enabled()) {
 942            while (penv) {
 943                penv->stop = 0;
 944                penv->stopped = 1;
 945                penv = penv->next_cpu;
 946            }
 947            return;
 948        }
 949    }
 950
 951    while (!all_vcpus_paused()) {
 952        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
 953        penv = first_cpu;
 954        while (penv) {
 955            qemu_cpu_kick(penv);
 956            penv = penv->next_cpu;
 957        }
 958    }
 959}
 960
 961void resume_all_vcpus(void)
 962{
 963    CPUArchState *penv = first_cpu;
 964
 965    qemu_clock_enable(vm_clock, true);
 966    while (penv) {
 967        penv->stop = 0;
 968        penv->stopped = 0;
 969        qemu_cpu_kick(penv);
 970        penv = penv->next_cpu;
 971    }
 972}
 973
 974static void qemu_tcg_init_vcpu(void *_env)
 975{
 976    CPUArchState *env = _env;
 977
 978    /* share a single thread for all cpus with TCG */
 979    if (!tcg_cpu_thread) {
 980        env->thread = g_malloc0(sizeof(QemuThread));
 981        env->halt_cond = g_malloc0(sizeof(QemuCond));
 982        qemu_cond_init(env->halt_cond);
 983        tcg_halt_cond = env->halt_cond;
 984        qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env,
 985                           QEMU_THREAD_JOINABLE);
 986#ifdef _WIN32
 987        env->hThread = qemu_thread_get_handle(env->thread);
 988#endif
 989        while (env->created == 0) {
 990            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
 991        }
 992        tcg_cpu_thread = env->thread;
 993    } else {
 994        env->thread = tcg_cpu_thread;
 995        env->halt_cond = tcg_halt_cond;
 996    }
 997}
 998
 999static void qemu_kvm_start_vcpu(CPUArchState *env)
1000{
1001    env->thread = g_malloc0(sizeof(QemuThread));
1002    env->halt_cond = g_malloc0(sizeof(QemuCond));
1003    qemu_cond_init(env->halt_cond);
1004    qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env,
1005                       QEMU_THREAD_JOINABLE);
1006    while (env->created == 0) {
1007        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1008    }
1009}
1010
1011static void qemu_dummy_start_vcpu(CPUArchState *env)
1012{
1013    env->thread = g_malloc0(sizeof(QemuThread));
1014    env->halt_cond = g_malloc0(sizeof(QemuCond));
1015    qemu_cond_init(env->halt_cond);
1016    qemu_thread_create(env->thread, qemu_dummy_cpu_thread_fn, env,
1017                       QEMU_THREAD_JOINABLE);
1018    while (env->created == 0) {
1019        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1020    }
1021}
1022
1023void qemu_init_vcpu(void *_env)
1024{
1025    CPUArchState *env = _env;
1026
1027    env->nr_cores = smp_cores;
1028    env->nr_threads = smp_threads;
1029    env->stopped = 1;
1030    if (kvm_enabled()) {
1031        qemu_kvm_start_vcpu(env);
1032    } else if (tcg_enabled()) {
1033        qemu_tcg_init_vcpu(env);
1034    } else {
1035        qemu_dummy_start_vcpu(env);
1036    }
1037}
1038
1039void cpu_stop_current(void)
1040{
1041    if (cpu_single_env) {
1042        cpu_single_env->stop = 0;
1043        cpu_single_env->stopped = 1;
1044        cpu_exit(cpu_single_env);
1045        qemu_cond_signal(&qemu_pause_cond);
1046    }
1047}
1048
1049void vm_stop(RunState state)
1050{
1051    if (!qemu_thread_is_self(&io_thread)) {
1052        qemu_system_vmstop_request(state);
1053        /*
1054         * FIXME: should not return to device code in case
1055         * vm_stop() has been requested.
1056         */
1057        cpu_stop_current();
1058        return;
1059    }
1060    do_vm_stop(state);
1061}
1062
1063/* does a state transition even if the VM is already stopped,
1064   current state is forgotten forever */
1065void vm_stop_force_state(RunState state)
1066{
1067    if (runstate_is_running()) {
1068        vm_stop(state);
1069    } else {
1070        runstate_set(state);
1071    }
1072}
1073
1074static int tcg_cpu_exec(CPUArchState *env)
1075{
1076    int ret;
1077#ifdef CONFIG_PROFILER
1078    int64_t ti;
1079#endif
1080
1081#ifdef CONFIG_PROFILER
1082    ti = profile_getclock();
1083#endif
1084    if (use_icount) {
1085        int64_t count;
1086        int decr;
1087        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1088        env->icount_decr.u16.low = 0;
1089        env->icount_extra = 0;
1090        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1091        qemu_icount += count;
1092        decr = (count > 0xffff) ? 0xffff : count;
1093        count -= decr;
1094        env->icount_decr.u16.low = decr;
1095        env->icount_extra = count;
1096    }
1097    ret = cpu_exec(env);
1098#ifdef CONFIG_PROFILER
1099    qemu_time += profile_getclock() - ti;
1100#endif
1101    if (use_icount) {
1102        /* Fold pending instructions back into the
1103           instruction counter, and clear the interrupt flag.  */
1104        qemu_icount -= (env->icount_decr.u16.low
1105                        + env->icount_extra);
1106        env->icount_decr.u32 = 0;
1107        env->icount_extra = 0;
1108    }
1109    return ret;
1110}
1111
1112static void tcg_exec_all(void)
1113{
1114    int r;
1115
1116    /* Account partial waits to the vm_clock.  */
1117    qemu_clock_warp(vm_clock);
1118
1119    if (next_cpu == NULL) {
1120        next_cpu = first_cpu;
1121    }
1122    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1123        CPUArchState *env = next_cpu;
1124
1125        qemu_clock_enable(vm_clock,
1126                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1127
1128        if (cpu_can_run(env)) {
1129            r = tcg_cpu_exec(env);
1130            if (r == EXCP_DEBUG) {
1131                cpu_handle_guest_debug(env);
1132                break;
1133            }
1134        } else if (env->stop || env->stopped) {
1135            break;
1136        }
1137    }
1138    exit_request = 0;
1139}
1140
1141void set_numa_modes(void)
1142{
1143    CPUArchState *env;
1144    int i;
1145
1146    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1147        for (i = 0; i < nb_numa_nodes; i++) {
1148            if (node_cpumask[i] & (1 << env->cpu_index)) {
1149                env->numa_node = i;
1150            }
1151        }
1152    }
1153}
1154
1155void set_cpu_log(const char *optarg)
1156{
1157    int mask;
1158    const CPULogItem *item;
1159
1160    mask = cpu_str_to_log_mask(optarg);
1161    if (!mask) {
1162        printf("Log items (comma separated):\n");
1163        for (item = cpu_log_items; item->mask != 0; item++) {
1164            printf("%-10s %s\n", item->name, item->help);
1165        }
1166        exit(1);
1167    }
1168    cpu_set_log(mask);
1169}
1170
1171void set_cpu_log_filename(const char *optarg)
1172{
1173    cpu_set_log_filename(optarg);
1174}
1175
1176void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1177{
1178    /* XXX: implement xxx_cpu_list for targets that still miss it */
1179#if defined(cpu_list_id)
1180    cpu_list_id(f, cpu_fprintf, optarg);
1181#elif defined(cpu_list)
1182    cpu_list(f, cpu_fprintf); /* deprecated */
1183#endif
1184}
1185
1186CpuInfoList *qmp_query_cpus(Error **errp)
1187{
1188    CpuInfoList *head = NULL, *cur_item = NULL;
1189    CPUArchState *env;
1190
1191    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1192        CpuInfoList *info;
1193
1194        cpu_synchronize_state(env);
1195
1196        info = g_malloc0(sizeof(*info));
1197        info->value = g_malloc0(sizeof(*info->value));
1198        info->value->CPU = env->cpu_index;
1199        info->value->current = (env == first_cpu);
1200        info->value->halted = env->halted;
1201        info->value->thread_id = env->thread_id;
1202#if defined(TARGET_I386)
1203        info->value->has_pc = true;
1204        info->value->pc = env->eip + env->segs[R_CS].base;
1205#elif defined(TARGET_PPC)
1206        info->value->has_nip = true;
1207        info->value->nip = env->nip;
1208#elif defined(TARGET_SPARC)
1209        info->value->has_pc = true;
1210        info->value->pc = env->pc;
1211        info->value->has_npc = true;
1212        info->value->npc = env->npc;
1213#elif defined(TARGET_MIPS)
1214        info->value->has_PC = true;
1215        info->value->PC = env->active_tc.PC;
1216#endif
1217
1218        /* XXX: waiting for the qapi to support GSList */
1219        if (!cur_item) {
1220            head = cur_item = info;
1221        } else {
1222            cur_item->next = info;
1223            cur_item = info;
1224        }
1225    }
1226
1227    return head;
1228}
1229
1230void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1231                 bool has_cpu, int64_t cpu_index, Error **errp)
1232{
1233    FILE *f;
1234    uint32_t l;
1235    CPUArchState *env;
1236    uint8_t buf[1024];
1237
1238    if (!has_cpu) {
1239        cpu_index = 0;
1240    }
1241
1242    for (env = first_cpu; env; env = env->next_cpu) {
1243        if (cpu_index == env->cpu_index) {
1244            break;
1245        }
1246    }
1247
1248    if (env == NULL) {
1249        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1250                  "a CPU number");
1251        return;
1252    }
1253
1254    f = fopen(filename, "wb");
1255    if (!f) {
1256        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1257        return;
1258    }
1259
1260    while (size != 0) {
1261        l = sizeof(buf);
1262        if (l > size)
1263            l = size;
1264        cpu_memory_rw_debug(env, addr, buf, l, 0);
1265        if (fwrite(buf, 1, l, f) != l) {
1266            error_set(errp, QERR_IO_ERROR);
1267            goto exit;
1268        }
1269        addr += l;
1270        size -= l;
1271    }
1272
1273exit:
1274    fclose(f);
1275}
1276
1277void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1278                  Error **errp)
1279{
1280    FILE *f;
1281    uint32_t l;
1282    uint8_t buf[1024];
1283
1284    f = fopen(filename, "wb");
1285    if (!f) {
1286        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1287        return;
1288    }
1289
1290    while (size != 0) {
1291        l = sizeof(buf);
1292        if (l > size)
1293            l = size;
1294        cpu_physical_memory_rw(addr, buf, l, 0);
1295        if (fwrite(buf, 1, l, f) != l) {
1296            error_set(errp, QERR_IO_ERROR);
1297            goto exit;
1298        }
1299        addr += l;
1300        size -= l;
1301    }
1302
1303exit:
1304    fclose(f);
1305}
1306
1307void qmp_inject_nmi(Error **errp)
1308{
1309#if defined(TARGET_I386)
1310    CPUArchState *env;
1311
1312    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1313        if (!env->apic_state) {
1314            cpu_interrupt(env, CPU_INTERRUPT_NMI);
1315        } else {
1316            apic_deliver_nmi(env->apic_state);
1317        }
1318    }
1319#else
1320    error_set(errp, QERR_UNSUPPORTED);
1321#endif
1322}
1323