qemu/cpus.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "config-host.h"
  27
  28#include "monitor/monitor.h"
  29#include "qapi/qmp/qerror.h"
  30#include "qemu/error-report.h"
  31#include "sysemu/sysemu.h"
  32#include "exec/gdbstub.h"
  33#include "sysemu/dma.h"
  34#include "sysemu/kvm.h"
  35#include "qmp-commands.h"
  36
  37#include "qemu/thread.h"
  38#include "sysemu/cpus.h"
  39#include "sysemu/qtest.h"
  40#include "qemu/main-loop.h"
  41#include "qemu/bitmap.h"
  42#include "qemu/seqlock.h"
  43#include "qapi-event.h"
  44#include "hw/nmi.h"
  45
  46#ifndef _WIN32
  47#include "qemu/compatfd.h"
  48#endif
  49
  50#ifdef CONFIG_LINUX
  51
  52#include <sys/prctl.h>
  53
  54#ifndef PR_MCE_KILL
  55#define PR_MCE_KILL 33
  56#endif
  57
  58#ifndef PR_MCE_KILL_SET
  59#define PR_MCE_KILL_SET 1
  60#endif
  61
  62#ifndef PR_MCE_KILL_EARLY
  63#define PR_MCE_KILL_EARLY 1
  64#endif
  65
  66#endif /* CONFIG_LINUX */
  67
  68static CPUState *next_cpu;
  69int64_t max_delay;
  70int64_t max_advance;
  71
  72bool cpu_is_stopped(CPUState *cpu)
  73{
  74    return cpu->stopped || !runstate_is_running();
  75}
  76
  77static bool cpu_thread_is_idle(CPUState *cpu)
  78{
  79    if (cpu->stop || cpu->queued_work_first) {
  80        return false;
  81    }
  82    if (cpu_is_stopped(cpu)) {
  83        return true;
  84    }
  85    if (!cpu->halted || cpu_has_work(cpu) ||
  86        kvm_halt_in_kernel()) {
  87        return false;
  88    }
  89    return true;
  90}
  91
  92static bool all_cpu_threads_idle(void)
  93{
  94    CPUState *cpu;
  95
  96    CPU_FOREACH(cpu) {
  97        if (!cpu_thread_is_idle(cpu)) {
  98            return false;
  99        }
 100    }
 101    return true;
 102}
 103
 104/***********************************************************/
 105/* guest cycle counter */
 106
 107/* Protected by TimersState seqlock */
 108
 109static bool icount_sleep = true;
 110static int64_t vm_clock_warp_start = -1;
 111/* Conversion factor from emulated instructions to virtual clock ticks.  */
 112static int icount_time_shift;
 113/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 114#define MAX_ICOUNT_SHIFT 10
 115
 116static QEMUTimer *icount_rt_timer;
 117static QEMUTimer *icount_vm_timer;
 118static QEMUTimer *icount_warp_timer;
 119
 120typedef struct TimersState {
 121    /* Protected by BQL.  */
 122    int64_t cpu_ticks_prev;
 123    int64_t cpu_ticks_offset;
 124
 125    /* cpu_clock_offset can be read out of BQL, so protect it with
 126     * this lock.
 127     */
 128    QemuSeqLock vm_clock_seqlock;
 129    int64_t cpu_clock_offset;
 130    int32_t cpu_ticks_enabled;
 131    int64_t dummy;
 132
 133    /* Compensate for varying guest execution speed.  */
 134    int64_t qemu_icount_bias;
 135    /* Only written by TCG thread */
 136    int64_t qemu_icount;
 137} TimersState;
 138
 139static TimersState timers_state;
 140
 141int64_t cpu_get_icount_raw(void)
 142{
 143    int64_t icount;
 144    CPUState *cpu = current_cpu;
 145
 146    icount = timers_state.qemu_icount;
 147    if (cpu) {
 148        if (!cpu_can_do_io(cpu)) {
 149            fprintf(stderr, "Bad icount read\n");
 150            exit(1);
 151        }
 152        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
 153    }
 154    return icount;
 155}
 156
 157/* Return the virtual CPU time, based on the instruction counter.  */
 158static int64_t cpu_get_icount_locked(void)
 159{
 160    int64_t icount = cpu_get_icount_raw();
 161    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 162}
 163
 164int64_t cpu_get_icount(void)
 165{
 166    int64_t icount;
 167    unsigned start;
 168
 169    do {
 170        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 171        icount = cpu_get_icount_locked();
 172    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 173
 174    return icount;
 175}
 176
 177int64_t cpu_icount_to_ns(int64_t icount)
 178{
 179    return icount << icount_time_shift;
 180}
 181
 182/* return the host CPU cycle counter and handle stop/restart */
 183/* Caller must hold the BQL */
 184int64_t cpu_get_ticks(void)
 185{
 186    int64_t ticks;
 187
 188    if (use_icount) {
 189        return cpu_get_icount();
 190    }
 191
 192    ticks = timers_state.cpu_ticks_offset;
 193    if (timers_state.cpu_ticks_enabled) {
 194        ticks += cpu_get_real_ticks();
 195    }
 196
 197    if (timers_state.cpu_ticks_prev > ticks) {
 198        /* Note: non increasing ticks may happen if the host uses
 199           software suspend */
 200        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 201        ticks = timers_state.cpu_ticks_prev;
 202    }
 203
 204    timers_state.cpu_ticks_prev = ticks;
 205    return ticks;
 206}
 207
 208static int64_t cpu_get_clock_locked(void)
 209{
 210    int64_t ticks;
 211
 212    ticks = timers_state.cpu_clock_offset;
 213    if (timers_state.cpu_ticks_enabled) {
 214        ticks += get_clock();
 215    }
 216
 217    return ticks;
 218}
 219
 220/* return the host CPU monotonic timer and handle stop/restart */
 221int64_t cpu_get_clock(void)
 222{
 223    int64_t ti;
 224    unsigned start;
 225
 226    do {
 227        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 228        ti = cpu_get_clock_locked();
 229    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 230
 231    return ti;
 232}
 233
 234/* enable cpu_get_ticks()
 235 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 236 */
 237void cpu_enable_ticks(void)
 238{
 239    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 240    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 241    if (!timers_state.cpu_ticks_enabled) {
 242        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 243        timers_state.cpu_clock_offset -= get_clock();
 244        timers_state.cpu_ticks_enabled = 1;
 245    }
 246    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 247}
 248
 249/* disable cpu_get_ticks() : the clock is stopped. You must not call
 250 * cpu_get_ticks() after that.
 251 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 252 */
 253void cpu_disable_ticks(void)
 254{
 255    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 256    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 257    if (timers_state.cpu_ticks_enabled) {
 258        timers_state.cpu_ticks_offset += cpu_get_real_ticks();
 259        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 260        timers_state.cpu_ticks_enabled = 0;
 261    }
 262    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 263}
 264
 265/* Correlation between real and virtual time is always going to be
 266   fairly approximate, so ignore small variation.
 267   When the guest is idle real and virtual time will be aligned in
 268   the IO wait loop.  */
 269#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
 270
 271static void icount_adjust(void)
 272{
 273    int64_t cur_time;
 274    int64_t cur_icount;
 275    int64_t delta;
 276
 277    /* Protected by TimersState mutex.  */
 278    static int64_t last_delta;
 279
 280    /* If the VM is not running, then do nothing.  */
 281    if (!runstate_is_running()) {
 282        return;
 283    }
 284
 285    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 286    cur_time = cpu_get_clock_locked();
 287    cur_icount = cpu_get_icount_locked();
 288
 289    delta = cur_icount - cur_time;
 290    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 291    if (delta > 0
 292        && last_delta + ICOUNT_WOBBLE < delta * 2
 293        && icount_time_shift > 0) {
 294        /* The guest is getting too far ahead.  Slow time down.  */
 295        icount_time_shift--;
 296    }
 297    if (delta < 0
 298        && last_delta - ICOUNT_WOBBLE > delta * 2
 299        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 300        /* The guest is getting too far behind.  Speed time up.  */
 301        icount_time_shift++;
 302    }
 303    last_delta = delta;
 304    timers_state.qemu_icount_bias = cur_icount
 305                              - (timers_state.qemu_icount << icount_time_shift);
 306    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 307}
 308
 309static void icount_adjust_rt(void *opaque)
 310{
 311    timer_mod(icount_rt_timer,
 312              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 313    icount_adjust();
 314}
 315
 316static void icount_adjust_vm(void *opaque)
 317{
 318    timer_mod(icount_vm_timer,
 319                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 320                   get_ticks_per_sec() / 10);
 321    icount_adjust();
 322}
 323
 324static int64_t qemu_icount_round(int64_t count)
 325{
 326    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 327}
 328
 329static void icount_warp_rt(void *opaque)
 330{
 331    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 332     * changes from -1 to another value, so the race here is okay.
 333     */
 334    if (atomic_read(&vm_clock_warp_start) == -1) {
 335        return;
 336    }
 337
 338    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 339    if (runstate_is_running()) {
 340        int64_t clock = cpu_get_clock_locked();
 341        int64_t warp_delta;
 342
 343        warp_delta = clock - vm_clock_warp_start;
 344        if (use_icount == 2) {
 345            /*
 346             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 347             * far ahead of real time.
 348             */
 349            int64_t cur_icount = cpu_get_icount_locked();
 350            int64_t delta = clock - cur_icount;
 351            warp_delta = MIN(warp_delta, delta);
 352        }
 353        timers_state.qemu_icount_bias += warp_delta;
 354    }
 355    vm_clock_warp_start = -1;
 356    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 357
 358    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 359        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 360    }
 361}
 362
 363void qtest_clock_warp(int64_t dest)
 364{
 365    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 366    AioContext *aio_context;
 367    assert(qtest_enabled());
 368    aio_context = qemu_get_aio_context();
 369    while (clock < dest) {
 370        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 371        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 372
 373        seqlock_write_lock(&timers_state.vm_clock_seqlock);
 374        timers_state.qemu_icount_bias += warp;
 375        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 376
 377        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 378        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 379        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 380    }
 381    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 382}
 383
 384void qemu_clock_warp(QEMUClockType type)
 385{
 386    int64_t clock;
 387    int64_t deadline;
 388
 389    /*
 390     * There are too many global variables to make the "warp" behavior
 391     * applicable to other clocks.  But a clock argument removes the
 392     * need for if statements all over the place.
 393     */
 394    if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
 395        return;
 396    }
 397
 398    if (icount_sleep) {
 399        /*
 400         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
 401         * This ensures that the deadline for the timer is computed correctly
 402         * below.
 403         * This also makes sure that the insn counter is synchronized before
 404         * the CPU starts running, in case the CPU is woken by an event other
 405         * than the earliest QEMU_CLOCK_VIRTUAL timer.
 406         */
 407        icount_warp_rt(NULL);
 408        timer_del(icount_warp_timer);
 409    }
 410    if (!all_cpu_threads_idle()) {
 411        return;
 412    }
 413
 414    if (qtest_enabled()) {
 415        /* When testing, qtest commands advance icount.  */
 416        return;
 417    }
 418
 419    /* We want to use the earliest deadline from ALL vm_clocks */
 420    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 421    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 422    if (deadline < 0) {
 423        static bool notified;
 424        if (!icount_sleep && !notified) {
 425            error_report("WARNING: icount sleep disabled and no active timers");
 426            notified = true;
 427        }
 428        return;
 429    }
 430
 431    if (deadline > 0) {
 432        /*
 433         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 434         * sleep.  Otherwise, the CPU might be waiting for a future timer
 435         * interrupt to wake it up, but the interrupt never comes because
 436         * the vCPU isn't running any insns and thus doesn't advance the
 437         * QEMU_CLOCK_VIRTUAL.
 438         */
 439        if (!icount_sleep) {
 440            /*
 441             * We never let VCPUs sleep in no sleep icount mode.
 442             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 443             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 444             * It is useful when we want a deterministic execution time,
 445             * isolated from host latencies.
 446             */
 447            seqlock_write_lock(&timers_state.vm_clock_seqlock);
 448            timers_state.qemu_icount_bias += deadline;
 449            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 450            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 451        } else {
 452            /*
 453             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 454             * "real" time, (related to the time left until the next event) has
 455             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 456             * This avoids that the warps are visible externally; for example,
 457             * you will not be sending network packets continuously instead of
 458             * every 100ms.
 459             */
 460            seqlock_write_lock(&timers_state.vm_clock_seqlock);
 461            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 462                vm_clock_warp_start = clock;
 463            }
 464            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 465            timer_mod_anticipate(icount_warp_timer, clock + deadline);
 466        }
 467    } else if (deadline == 0) {
 468        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 469    }
 470}
 471
 472static bool icount_state_needed(void *opaque)
 473{
 474    return use_icount;
 475}
 476
 477/*
 478 * This is a subsection for icount migration.
 479 */
 480static const VMStateDescription icount_vmstate_timers = {
 481    .name = "timer/icount",
 482    .version_id = 1,
 483    .minimum_version_id = 1,
 484    .needed = icount_state_needed,
 485    .fields = (VMStateField[]) {
 486        VMSTATE_INT64(qemu_icount_bias, TimersState),
 487        VMSTATE_INT64(qemu_icount, TimersState),
 488        VMSTATE_END_OF_LIST()
 489    }
 490};
 491
 492static const VMStateDescription vmstate_timers = {
 493    .name = "timer",
 494    .version_id = 2,
 495    .minimum_version_id = 1,
 496    .fields = (VMStateField[]) {
 497        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 498        VMSTATE_INT64(dummy, TimersState),
 499        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 500        VMSTATE_END_OF_LIST()
 501    },
 502    .subsections = (const VMStateDescription*[]) {
 503        &icount_vmstate_timers,
 504        NULL
 505    }
 506};
 507
 508void cpu_ticks_init(void)
 509{
 510    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
 511    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 512}
 513
 514void configure_icount(QemuOpts *opts, Error **errp)
 515{
 516    const char *option;
 517    char *rem_str = NULL;
 518
 519    option = qemu_opt_get(opts, "shift");
 520    if (!option) {
 521        if (qemu_opt_get(opts, "align") != NULL) {
 522            error_setg(errp, "Please specify shift option when using align");
 523        }
 524        return;
 525    }
 526
 527    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 528    if (icount_sleep) {
 529        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 530                                         icount_warp_rt, NULL);
 531    }
 532
 533    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 534
 535    if (icount_align_option && !icount_sleep) {
 536        error_setg(errp, "align=on and sleep=no are incompatible");
 537    }
 538    if (strcmp(option, "auto") != 0) {
 539        errno = 0;
 540        icount_time_shift = strtol(option, &rem_str, 0);
 541        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 542            error_setg(errp, "icount: Invalid shift value");
 543        }
 544        use_icount = 1;
 545        return;
 546    } else if (icount_align_option) {
 547        error_setg(errp, "shift=auto and align=on are incompatible");
 548    } else if (!icount_sleep) {
 549        error_setg(errp, "shift=auto and sleep=no are incompatible");
 550    }
 551
 552    use_icount = 2;
 553
 554    /* 125MIPS seems a reasonable initial guess at the guest speed.
 555       It will be corrected fairly quickly anyway.  */
 556    icount_time_shift = 3;
 557
 558    /* Have both realtime and virtual time triggers for speed adjustment.
 559       The realtime trigger catches emulated time passing too slowly,
 560       the virtual time trigger catches emulated time passing too fast.
 561       Realtime triggers occur even when idle, so use them less frequently
 562       than VM triggers.  */
 563    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 564                                   icount_adjust_rt, NULL);
 565    timer_mod(icount_rt_timer,
 566                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 567    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 568                                        icount_adjust_vm, NULL);
 569    timer_mod(icount_vm_timer,
 570                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 571                   get_ticks_per_sec() / 10);
 572}
 573
 574/***********************************************************/
 575void hw_error(const char *fmt, ...)
 576{
 577    va_list ap;
 578    CPUState *cpu;
 579
 580    va_start(ap, fmt);
 581    fprintf(stderr, "qemu: hardware error: ");
 582    vfprintf(stderr, fmt, ap);
 583    fprintf(stderr, "\n");
 584    CPU_FOREACH(cpu) {
 585        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 586        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 587    }
 588    va_end(ap);
 589    abort();
 590}
 591
 592void cpu_synchronize_all_states(void)
 593{
 594    CPUState *cpu;
 595
 596    CPU_FOREACH(cpu) {
 597        cpu_synchronize_state(cpu);
 598    }
 599}
 600
 601void cpu_synchronize_all_post_reset(void)
 602{
 603    CPUState *cpu;
 604
 605    CPU_FOREACH(cpu) {
 606        cpu_synchronize_post_reset(cpu);
 607    }
 608}
 609
 610void cpu_synchronize_all_post_init(void)
 611{
 612    CPUState *cpu;
 613
 614    CPU_FOREACH(cpu) {
 615        cpu_synchronize_post_init(cpu);
 616    }
 617}
 618
 619void cpu_clean_all_dirty(void)
 620{
 621    CPUState *cpu;
 622
 623    CPU_FOREACH(cpu) {
 624        cpu_clean_state(cpu);
 625    }
 626}
 627
 628static int do_vm_stop(RunState state)
 629{
 630    int ret = 0;
 631
 632    if (runstate_is_running()) {
 633        cpu_disable_ticks();
 634        pause_all_vcpus();
 635        runstate_set(state);
 636        vm_state_notify(0, state);
 637        qapi_event_send_stop(&error_abort);
 638    }
 639
 640    bdrv_drain_all();
 641    ret = bdrv_flush_all();
 642
 643    return ret;
 644}
 645
 646static bool cpu_can_run(CPUState *cpu)
 647{
 648    if (cpu->stop) {
 649        return false;
 650    }
 651    if (cpu_is_stopped(cpu)) {
 652        return false;
 653    }
 654    return true;
 655}
 656
 657static void cpu_handle_guest_debug(CPUState *cpu)
 658{
 659    gdb_set_stop_cpu(cpu);
 660    qemu_system_debug_request();
 661    cpu->stopped = true;
 662}
 663
 664static void cpu_signal(int sig)
 665{
 666    if (current_cpu) {
 667        cpu_exit(current_cpu);
 668    }
 669    exit_request = 1;
 670}
 671
 672#ifdef CONFIG_LINUX
 673static void sigbus_reraise(void)
 674{
 675    sigset_t set;
 676    struct sigaction action;
 677
 678    memset(&action, 0, sizeof(action));
 679    action.sa_handler = SIG_DFL;
 680    if (!sigaction(SIGBUS, &action, NULL)) {
 681        raise(SIGBUS);
 682        sigemptyset(&set);
 683        sigaddset(&set, SIGBUS);
 684        sigprocmask(SIG_UNBLOCK, &set, NULL);
 685    }
 686    perror("Failed to re-raise SIGBUS!\n");
 687    abort();
 688}
 689
 690static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 691                           void *ctx)
 692{
 693    if (kvm_on_sigbus(siginfo->ssi_code,
 694                      (void *)(intptr_t)siginfo->ssi_addr)) {
 695        sigbus_reraise();
 696    }
 697}
 698
 699static void qemu_init_sigbus(void)
 700{
 701    struct sigaction action;
 702
 703    memset(&action, 0, sizeof(action));
 704    action.sa_flags = SA_SIGINFO;
 705    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
 706    sigaction(SIGBUS, &action, NULL);
 707
 708    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 709}
 710
 711static void qemu_kvm_eat_signals(CPUState *cpu)
 712{
 713    struct timespec ts = { 0, 0 };
 714    siginfo_t siginfo;
 715    sigset_t waitset;
 716    sigset_t chkset;
 717    int r;
 718
 719    sigemptyset(&waitset);
 720    sigaddset(&waitset, SIG_IPI);
 721    sigaddset(&waitset, SIGBUS);
 722
 723    do {
 724        r = sigtimedwait(&waitset, &siginfo, &ts);
 725        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
 726            perror("sigtimedwait");
 727            exit(1);
 728        }
 729
 730        switch (r) {
 731        case SIGBUS:
 732            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
 733                sigbus_reraise();
 734            }
 735            break;
 736        default:
 737            break;
 738        }
 739
 740        r = sigpending(&chkset);
 741        if (r == -1) {
 742            perror("sigpending");
 743            exit(1);
 744        }
 745    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
 746}
 747
 748#else /* !CONFIG_LINUX */
 749
 750static void qemu_init_sigbus(void)
 751{
 752}
 753
 754static void qemu_kvm_eat_signals(CPUState *cpu)
 755{
 756}
 757#endif /* !CONFIG_LINUX */
 758
 759#ifndef _WIN32
 760static void dummy_signal(int sig)
 761{
 762}
 763
 764static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 765{
 766    int r;
 767    sigset_t set;
 768    struct sigaction sigact;
 769
 770    memset(&sigact, 0, sizeof(sigact));
 771    sigact.sa_handler = dummy_signal;
 772    sigaction(SIG_IPI, &sigact, NULL);
 773
 774    pthread_sigmask(SIG_BLOCK, NULL, &set);
 775    sigdelset(&set, SIG_IPI);
 776    sigdelset(&set, SIGBUS);
 777    r = kvm_set_signal_mask(cpu, &set);
 778    if (r) {
 779        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
 780        exit(1);
 781    }
 782}
 783
 784static void qemu_tcg_init_cpu_signals(void)
 785{
 786    sigset_t set;
 787    struct sigaction sigact;
 788
 789    memset(&sigact, 0, sizeof(sigact));
 790    sigact.sa_handler = cpu_signal;
 791    sigaction(SIG_IPI, &sigact, NULL);
 792
 793    sigemptyset(&set);
 794    sigaddset(&set, SIG_IPI);
 795    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 796}
 797
 798#else /* _WIN32 */
 799static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 800{
 801    abort();
 802}
 803
 804static void qemu_tcg_init_cpu_signals(void)
 805{
 806}
 807#endif /* _WIN32 */
 808
 809static QemuMutex qemu_global_mutex;
 810static QemuCond qemu_io_proceeded_cond;
 811static unsigned iothread_requesting_mutex;
 812
 813static QemuThread io_thread;
 814
 815static QemuThread *tcg_cpu_thread;
 816static QemuCond *tcg_halt_cond;
 817
 818/* cpu creation */
 819static QemuCond qemu_cpu_cond;
 820/* system init */
 821static QemuCond qemu_pause_cond;
 822static QemuCond qemu_work_cond;
 823
 824void qemu_init_cpu_loop(void)
 825{
 826    qemu_init_sigbus();
 827    qemu_cond_init(&qemu_cpu_cond);
 828    qemu_cond_init(&qemu_pause_cond);
 829    qemu_cond_init(&qemu_work_cond);
 830    qemu_cond_init(&qemu_io_proceeded_cond);
 831    qemu_mutex_init(&qemu_global_mutex);
 832
 833    qemu_thread_get_self(&io_thread);
 834}
 835
 836void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 837{
 838    struct qemu_work_item wi;
 839
 840    if (qemu_cpu_is_self(cpu)) {
 841        func(data);
 842        return;
 843    }
 844
 845    wi.func = func;
 846    wi.data = data;
 847    wi.free = false;
 848    if (cpu->queued_work_first == NULL) {
 849        cpu->queued_work_first = &wi;
 850    } else {
 851        cpu->queued_work_last->next = &wi;
 852    }
 853    cpu->queued_work_last = &wi;
 854    wi.next = NULL;
 855    wi.done = false;
 856
 857    qemu_cpu_kick(cpu);
 858    while (!wi.done) {
 859        CPUState *self_cpu = current_cpu;
 860
 861        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
 862        current_cpu = self_cpu;
 863    }
 864}
 865
 866void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 867{
 868    struct qemu_work_item *wi;
 869
 870    if (qemu_cpu_is_self(cpu)) {
 871        func(data);
 872        return;
 873    }
 874
 875    wi = g_malloc0(sizeof(struct qemu_work_item));
 876    wi->func = func;
 877    wi->data = data;
 878    wi->free = true;
 879    if (cpu->queued_work_first == NULL) {
 880        cpu->queued_work_first = wi;
 881    } else {
 882        cpu->queued_work_last->next = wi;
 883    }
 884    cpu->queued_work_last = wi;
 885    wi->next = NULL;
 886    wi->done = false;
 887
 888    qemu_cpu_kick(cpu);
 889}
 890
 891static void flush_queued_work(CPUState *cpu)
 892{
 893    struct qemu_work_item *wi;
 894
 895    if (cpu->queued_work_first == NULL) {
 896        return;
 897    }
 898
 899    while ((wi = cpu->queued_work_first)) {
 900        cpu->queued_work_first = wi->next;
 901        wi->func(wi->data);
 902        wi->done = true;
 903        if (wi->free) {
 904            g_free(wi);
 905        }
 906    }
 907    cpu->queued_work_last = NULL;
 908    qemu_cond_broadcast(&qemu_work_cond);
 909}
 910
 911static void qemu_wait_io_event_common(CPUState *cpu)
 912{
 913    if (cpu->stop) {
 914        cpu->stop = false;
 915        cpu->stopped = true;
 916        qemu_cond_signal(&qemu_pause_cond);
 917    }
 918    flush_queued_work(cpu);
 919    cpu->thread_kicked = false;
 920}
 921
 922static void qemu_tcg_wait_io_event(void)
 923{
 924    CPUState *cpu;
 925
 926    while (all_cpu_threads_idle()) {
 927       /* Start accounting real time to the virtual clock if the CPUs
 928          are idle.  */
 929        qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
 930        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
 931    }
 932
 933    while (iothread_requesting_mutex) {
 934        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
 935    }
 936
 937    CPU_FOREACH(cpu) {
 938        qemu_wait_io_event_common(cpu);
 939    }
 940}
 941
 942static void qemu_kvm_wait_io_event(CPUState *cpu)
 943{
 944    while (cpu_thread_is_idle(cpu)) {
 945        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
 946    }
 947
 948    qemu_kvm_eat_signals(cpu);
 949    qemu_wait_io_event_common(cpu);
 950}
 951
 952static void *qemu_kvm_cpu_thread_fn(void *arg)
 953{
 954    CPUState *cpu = arg;
 955    int r;
 956
 957    rcu_register_thread();
 958
 959    qemu_mutex_lock_iothread();
 960    qemu_thread_get_self(cpu->thread);
 961    cpu->thread_id = qemu_get_thread_id();
 962    cpu->can_do_io = 1;
 963    current_cpu = cpu;
 964
 965    r = kvm_init_vcpu(cpu);
 966    if (r < 0) {
 967        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
 968        exit(1);
 969    }
 970
 971    qemu_kvm_init_cpu_signals(cpu);
 972
 973    /* signal CPU creation */
 974    cpu->created = true;
 975    qemu_cond_signal(&qemu_cpu_cond);
 976
 977    while (1) {
 978        if (cpu_can_run(cpu)) {
 979            r = kvm_cpu_exec(cpu);
 980            if (r == EXCP_DEBUG) {
 981                cpu_handle_guest_debug(cpu);
 982            }
 983        }
 984        qemu_kvm_wait_io_event(cpu);
 985    }
 986
 987    return NULL;
 988}
 989
 990static void *qemu_dummy_cpu_thread_fn(void *arg)
 991{
 992#ifdef _WIN32
 993    fprintf(stderr, "qtest is not supported under Windows\n");
 994    exit(1);
 995#else
 996    CPUState *cpu = arg;
 997    sigset_t waitset;
 998    int r;
 999
1000    rcu_register_thread();
1001
1002    qemu_mutex_lock_iothread();
1003    qemu_thread_get_self(cpu->thread);
1004    cpu->thread_id = qemu_get_thread_id();
1005    cpu->can_do_io = 1;
1006
1007    sigemptyset(&waitset);
1008    sigaddset(&waitset, SIG_IPI);
1009
1010    /* signal CPU creation */
1011    cpu->created = true;
1012    qemu_cond_signal(&qemu_cpu_cond);
1013
1014    current_cpu = cpu;
1015    while (1) {
1016        current_cpu = NULL;
1017        qemu_mutex_unlock_iothread();
1018        do {
1019            int sig;
1020            r = sigwait(&waitset, &sig);
1021        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1022        if (r == -1) {
1023            perror("sigwait");
1024            exit(1);
1025        }
1026        qemu_mutex_lock_iothread();
1027        current_cpu = cpu;
1028        qemu_wait_io_event_common(cpu);
1029    }
1030
1031    return NULL;
1032#endif
1033}
1034
1035static void tcg_exec_all(void);
1036
1037static void *qemu_tcg_cpu_thread_fn(void *arg)
1038{
1039    CPUState *cpu = arg;
1040
1041    rcu_register_thread();
1042
1043    qemu_mutex_lock_iothread();
1044    qemu_tcg_init_cpu_signals();
1045    qemu_thread_get_self(cpu->thread);
1046
1047    CPU_FOREACH(cpu) {
1048        cpu->thread_id = qemu_get_thread_id();
1049        cpu->created = true;
1050        cpu->can_do_io = 1;
1051    }
1052    qemu_cond_signal(&qemu_cpu_cond);
1053
1054    /* wait for initial kick-off after machine start */
1055    while (first_cpu->stopped) {
1056        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1057
1058        /* process any pending work */
1059        CPU_FOREACH(cpu) {
1060            qemu_wait_io_event_common(cpu);
1061        }
1062    }
1063
1064    /* process any pending work */
1065    exit_request = 1;
1066
1067    while (1) {
1068        tcg_exec_all();
1069
1070        if (use_icount) {
1071            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1072
1073            if (deadline == 0) {
1074                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1075            }
1076        }
1077        qemu_tcg_wait_io_event();
1078    }
1079
1080    return NULL;
1081}
1082
1083static void qemu_cpu_kick_thread(CPUState *cpu)
1084{
1085#ifndef _WIN32
1086    int err;
1087
1088    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1089    if (err) {
1090        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1091        exit(1);
1092    }
1093#else /* _WIN32 */
1094    if (!qemu_cpu_is_self(cpu)) {
1095        CONTEXT tcgContext;
1096
1097        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1098            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1099                    GetLastError());
1100            exit(1);
1101        }
1102
1103        /* On multi-core systems, we are not sure that the thread is actually
1104         * suspended until we can get the context.
1105         */
1106        tcgContext.ContextFlags = CONTEXT_CONTROL;
1107        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1108            continue;
1109        }
1110
1111        cpu_signal(0);
1112
1113        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1114            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1115                    GetLastError());
1116            exit(1);
1117        }
1118    }
1119#endif
1120}
1121
1122void qemu_cpu_kick(CPUState *cpu)
1123{
1124    qemu_cond_broadcast(cpu->halt_cond);
1125    if (!tcg_enabled() && !cpu->thread_kicked) {
1126        qemu_cpu_kick_thread(cpu);
1127        cpu->thread_kicked = true;
1128    }
1129}
1130
1131void qemu_cpu_kick_self(void)
1132{
1133#ifndef _WIN32
1134    assert(current_cpu);
1135
1136    if (!current_cpu->thread_kicked) {
1137        qemu_cpu_kick_thread(current_cpu);
1138        current_cpu->thread_kicked = true;
1139    }
1140#else
1141    abort();
1142#endif
1143}
1144
1145bool qemu_cpu_is_self(CPUState *cpu)
1146{
1147    return qemu_thread_is_self(cpu->thread);
1148}
1149
1150bool qemu_in_vcpu_thread(void)
1151{
1152    return current_cpu && qemu_cpu_is_self(current_cpu);
1153}
1154
1155static __thread bool iothread_locked = false;
1156
1157bool qemu_mutex_iothread_locked(void)
1158{
1159    return iothread_locked;
1160}
1161
1162void qemu_mutex_lock_iothread(void)
1163{
1164    atomic_inc(&iothread_requesting_mutex);
1165    /* In the simple case there is no need to bump the VCPU thread out of
1166     * TCG code execution.
1167     */
1168    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1169        !first_cpu || !first_cpu->created) {
1170        qemu_mutex_lock(&qemu_global_mutex);
1171        atomic_dec(&iothread_requesting_mutex);
1172    } else {
1173        if (qemu_mutex_trylock(&qemu_global_mutex)) {
1174            qemu_cpu_kick_thread(first_cpu);
1175            qemu_mutex_lock(&qemu_global_mutex);
1176        }
1177        atomic_dec(&iothread_requesting_mutex);
1178        qemu_cond_broadcast(&qemu_io_proceeded_cond);
1179    }
1180    iothread_locked = true;
1181}
1182
1183void qemu_mutex_unlock_iothread(void)
1184{
1185    iothread_locked = false;
1186    qemu_mutex_unlock(&qemu_global_mutex);
1187}
1188
1189static int all_vcpus_paused(void)
1190{
1191    CPUState *cpu;
1192
1193    CPU_FOREACH(cpu) {
1194        if (!cpu->stopped) {
1195            return 0;
1196        }
1197    }
1198
1199    return 1;
1200}
1201
1202void pause_all_vcpus(void)
1203{
1204    CPUState *cpu;
1205
1206    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1207    CPU_FOREACH(cpu) {
1208        cpu->stop = true;
1209        qemu_cpu_kick(cpu);
1210    }
1211
1212    if (qemu_in_vcpu_thread()) {
1213        cpu_stop_current();
1214        if (!kvm_enabled()) {
1215            CPU_FOREACH(cpu) {
1216                cpu->stop = false;
1217                cpu->stopped = true;
1218            }
1219            return;
1220        }
1221    }
1222
1223    while (!all_vcpus_paused()) {
1224        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1225        CPU_FOREACH(cpu) {
1226            qemu_cpu_kick(cpu);
1227        }
1228    }
1229}
1230
1231void cpu_resume(CPUState *cpu)
1232{
1233    cpu->stop = false;
1234    cpu->stopped = false;
1235    qemu_cpu_kick(cpu);
1236}
1237
1238void resume_all_vcpus(void)
1239{
1240    CPUState *cpu;
1241
1242    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1243    CPU_FOREACH(cpu) {
1244        cpu_resume(cpu);
1245    }
1246}
1247
1248/* For temporary buffers for forming a name */
1249#define VCPU_THREAD_NAME_SIZE 16
1250
1251static void qemu_tcg_init_vcpu(CPUState *cpu)
1252{
1253    char thread_name[VCPU_THREAD_NAME_SIZE];
1254
1255    tcg_cpu_address_space_init(cpu, cpu->as);
1256
1257    /* share a single thread for all cpus with TCG */
1258    if (!tcg_cpu_thread) {
1259        cpu->thread = g_malloc0(sizeof(QemuThread));
1260        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1261        qemu_cond_init(cpu->halt_cond);
1262        tcg_halt_cond = cpu->halt_cond;
1263        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1264                 cpu->cpu_index);
1265        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1266                           cpu, QEMU_THREAD_JOINABLE);
1267#ifdef _WIN32
1268        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1269#endif
1270        while (!cpu->created) {
1271            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1272        }
1273        tcg_cpu_thread = cpu->thread;
1274    } else {
1275        cpu->thread = tcg_cpu_thread;
1276        cpu->halt_cond = tcg_halt_cond;
1277    }
1278}
1279
1280static void qemu_kvm_start_vcpu(CPUState *cpu)
1281{
1282    char thread_name[VCPU_THREAD_NAME_SIZE];
1283
1284    cpu->thread = g_malloc0(sizeof(QemuThread));
1285    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1286    qemu_cond_init(cpu->halt_cond);
1287    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1288             cpu->cpu_index);
1289    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1290                       cpu, QEMU_THREAD_JOINABLE);
1291    while (!cpu->created) {
1292        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1293    }
1294}
1295
1296static void qemu_dummy_start_vcpu(CPUState *cpu)
1297{
1298    char thread_name[VCPU_THREAD_NAME_SIZE];
1299
1300    cpu->thread = g_malloc0(sizeof(QemuThread));
1301    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1302    qemu_cond_init(cpu->halt_cond);
1303    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1304             cpu->cpu_index);
1305    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1306                       QEMU_THREAD_JOINABLE);
1307    while (!cpu->created) {
1308        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1309    }
1310}
1311
1312void qemu_init_vcpu(CPUState *cpu)
1313{
1314    cpu->nr_cores = smp_cores;
1315    cpu->nr_threads = smp_threads;
1316    cpu->stopped = true;
1317    if (kvm_enabled()) {
1318        qemu_kvm_start_vcpu(cpu);
1319    } else if (tcg_enabled()) {
1320        qemu_tcg_init_vcpu(cpu);
1321    } else {
1322        qemu_dummy_start_vcpu(cpu);
1323    }
1324}
1325
1326void cpu_stop_current(void)
1327{
1328    if (current_cpu) {
1329        current_cpu->stop = false;
1330        current_cpu->stopped = true;
1331        cpu_exit(current_cpu);
1332        qemu_cond_signal(&qemu_pause_cond);
1333    }
1334}
1335
1336int vm_stop(RunState state)
1337{
1338    if (qemu_in_vcpu_thread()) {
1339        qemu_system_vmstop_request_prepare();
1340        qemu_system_vmstop_request(state);
1341        /*
1342         * FIXME: should not return to device code in case
1343         * vm_stop() has been requested.
1344         */
1345        cpu_stop_current();
1346        return 0;
1347    }
1348
1349    return do_vm_stop(state);
1350}
1351
1352/* does a state transition even if the VM is already stopped,
1353   current state is forgotten forever */
1354int vm_stop_force_state(RunState state)
1355{
1356    if (runstate_is_running()) {
1357        return vm_stop(state);
1358    } else {
1359        runstate_set(state);
1360        /* Make sure to return an error if the flush in a previous vm_stop()
1361         * failed. */
1362        return bdrv_flush_all();
1363    }
1364}
1365
1366static int tcg_cpu_exec(CPUState *cpu)
1367{
1368    int ret;
1369#ifdef CONFIG_PROFILER
1370    int64_t ti;
1371#endif
1372
1373#ifdef CONFIG_PROFILER
1374    ti = profile_getclock();
1375#endif
1376    if (use_icount) {
1377        int64_t count;
1378        int64_t deadline;
1379        int decr;
1380        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1381                                    + cpu->icount_extra);
1382        cpu->icount_decr.u16.low = 0;
1383        cpu->icount_extra = 0;
1384        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1385
1386        /* Maintain prior (possibly buggy) behaviour where if no deadline
1387         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1388         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1389         * nanoseconds.
1390         */
1391        if ((deadline < 0) || (deadline > INT32_MAX)) {
1392            deadline = INT32_MAX;
1393        }
1394
1395        count = qemu_icount_round(deadline);
1396        timers_state.qemu_icount += count;
1397        decr = (count > 0xffff) ? 0xffff : count;
1398        count -= decr;
1399        cpu->icount_decr.u16.low = decr;
1400        cpu->icount_extra = count;
1401    }
1402    ret = cpu_exec(cpu);
1403#ifdef CONFIG_PROFILER
1404    tcg_time += profile_getclock() - ti;
1405#endif
1406    if (use_icount) {
1407        /* Fold pending instructions back into the
1408           instruction counter, and clear the interrupt flag.  */
1409        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1410                        + cpu->icount_extra);
1411        cpu->icount_decr.u32 = 0;
1412        cpu->icount_extra = 0;
1413    }
1414    return ret;
1415}
1416
1417static void tcg_exec_all(void)
1418{
1419    int r;
1420
1421    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1422    qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1423
1424    if (next_cpu == NULL) {
1425        next_cpu = first_cpu;
1426    }
1427    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1428        CPUState *cpu = next_cpu;
1429
1430        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1431                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1432
1433        if (cpu_can_run(cpu)) {
1434            r = tcg_cpu_exec(cpu);
1435            if (r == EXCP_DEBUG) {
1436                cpu_handle_guest_debug(cpu);
1437                break;
1438            }
1439        } else if (cpu->stop || cpu->stopped) {
1440            break;
1441        }
1442    }
1443    exit_request = 0;
1444}
1445
1446void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1447{
1448    /* XXX: implement xxx_cpu_list for targets that still miss it */
1449#if defined(cpu_list)
1450    cpu_list(f, cpu_fprintf);
1451#endif
1452}
1453
1454CpuInfoList *qmp_query_cpus(Error **errp)
1455{
1456    CpuInfoList *head = NULL, *cur_item = NULL;
1457    CPUState *cpu;
1458
1459    CPU_FOREACH(cpu) {
1460        CpuInfoList *info;
1461#if defined(TARGET_I386)
1462        X86CPU *x86_cpu = X86_CPU(cpu);
1463        CPUX86State *env = &x86_cpu->env;
1464#elif defined(TARGET_PPC)
1465        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1466        CPUPPCState *env = &ppc_cpu->env;
1467#elif defined(TARGET_SPARC)
1468        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1469        CPUSPARCState *env = &sparc_cpu->env;
1470#elif defined(TARGET_MIPS)
1471        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1472        CPUMIPSState *env = &mips_cpu->env;
1473#elif defined(TARGET_TRICORE)
1474        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1475        CPUTriCoreState *env = &tricore_cpu->env;
1476#endif
1477
1478        cpu_synchronize_state(cpu);
1479
1480        info = g_malloc0(sizeof(*info));
1481        info->value = g_malloc0(sizeof(*info->value));
1482        info->value->CPU = cpu->cpu_index;
1483        info->value->current = (cpu == first_cpu);
1484        info->value->halted = cpu->halted;
1485        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1486        info->value->thread_id = cpu->thread_id;
1487#if defined(TARGET_I386)
1488        info->value->has_pc = true;
1489        info->value->pc = env->eip + env->segs[R_CS].base;
1490#elif defined(TARGET_PPC)
1491        info->value->has_nip = true;
1492        info->value->nip = env->nip;
1493#elif defined(TARGET_SPARC)
1494        info->value->has_pc = true;
1495        info->value->pc = env->pc;
1496        info->value->has_npc = true;
1497        info->value->npc = env->npc;
1498#elif defined(TARGET_MIPS)
1499        info->value->has_PC = true;
1500        info->value->PC = env->active_tc.PC;
1501#elif defined(TARGET_TRICORE)
1502        info->value->has_PC = true;
1503        info->value->PC = env->PC;
1504#endif
1505
1506        /* XXX: waiting for the qapi to support GSList */
1507        if (!cur_item) {
1508            head = cur_item = info;
1509        } else {
1510            cur_item->next = info;
1511            cur_item = info;
1512        }
1513    }
1514
1515    return head;
1516}
1517
1518void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1519                 bool has_cpu, int64_t cpu_index, Error **errp)
1520{
1521    FILE *f;
1522    uint32_t l;
1523    CPUState *cpu;
1524    uint8_t buf[1024];
1525    int64_t orig_addr = addr, orig_size = size;
1526
1527    if (!has_cpu) {
1528        cpu_index = 0;
1529    }
1530
1531    cpu = qemu_get_cpu(cpu_index);
1532    if (cpu == NULL) {
1533        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1534                   "a CPU number");
1535        return;
1536    }
1537
1538    f = fopen(filename, "wb");
1539    if (!f) {
1540        error_setg_file_open(errp, errno, filename);
1541        return;
1542    }
1543
1544    while (size != 0) {
1545        l = sizeof(buf);
1546        if (l > size)
1547            l = size;
1548        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1549            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1550                             " specified", orig_addr, orig_size);
1551            goto exit;
1552        }
1553        if (fwrite(buf, 1, l, f) != l) {
1554            error_setg(errp, QERR_IO_ERROR);
1555            goto exit;
1556        }
1557        addr += l;
1558        size -= l;
1559    }
1560
1561exit:
1562    fclose(f);
1563}
1564
1565void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1566                  Error **errp)
1567{
1568    FILE *f;
1569    uint32_t l;
1570    uint8_t buf[1024];
1571
1572    f = fopen(filename, "wb");
1573    if (!f) {
1574        error_setg_file_open(errp, errno, filename);
1575        return;
1576    }
1577
1578    while (size != 0) {
1579        l = sizeof(buf);
1580        if (l > size)
1581            l = size;
1582        cpu_physical_memory_read(addr, buf, l);
1583        if (fwrite(buf, 1, l, f) != l) {
1584            error_setg(errp, QERR_IO_ERROR);
1585            goto exit;
1586        }
1587        addr += l;
1588        size -= l;
1589    }
1590
1591exit:
1592    fclose(f);
1593}
1594
1595void qmp_inject_nmi(Error **errp)
1596{
1597#if defined(TARGET_I386)
1598    CPUState *cs;
1599
1600    CPU_FOREACH(cs) {
1601        X86CPU *cpu = X86_CPU(cs);
1602
1603        if (!cpu->apic_state) {
1604            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1605        } else {
1606            apic_deliver_nmi(cpu->apic_state);
1607        }
1608    }
1609#else
1610    nmi_monitor_handle(monitor_get_cpu_index(), errp);
1611#endif
1612}
1613
1614void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1615{
1616    if (!use_icount) {
1617        return;
1618    }
1619
1620    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1621                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1622    if (icount_align_option) {
1623        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1624        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1625    } else {
1626        cpu_fprintf(f, "Max guest delay     NA\n");
1627        cpu_fprintf(f, "Max guest advance   NA\n");
1628    }
1629}
1630