qemu/cpus.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "config-host.h"
  27
  28#include "monitor/monitor.h"
  29#include "qapi/qmp/qerror.h"
  30#include "sysemu/sysemu.h"
  31#include "exec/gdbstub.h"
  32#include "sysemu/dma.h"
  33#include "sysemu/kvm.h"
  34#include "qmp-commands.h"
  35
  36#include "qemu/thread.h"
  37#include "sysemu/cpus.h"
  38#include "sysemu/qtest.h"
  39#include "qemu/main-loop.h"
  40#include "qemu/bitmap.h"
  41#include "qemu/seqlock.h"
  42#include "qapi-event.h"
  43#include "hw/nmi.h"
  44
  45#ifndef _WIN32
  46#include "qemu/compatfd.h"
  47#endif
  48
  49#ifdef CONFIG_LINUX
  50
  51#include <sys/prctl.h>
  52
  53#ifndef PR_MCE_KILL
  54#define PR_MCE_KILL 33
  55#endif
  56
  57#ifndef PR_MCE_KILL_SET
  58#define PR_MCE_KILL_SET 1
  59#endif
  60
  61#ifndef PR_MCE_KILL_EARLY
  62#define PR_MCE_KILL_EARLY 1
  63#endif
  64
  65#endif /* CONFIG_LINUX */
  66
  67static CPUState *next_cpu;
  68int64_t max_delay;
  69int64_t max_advance;
  70
  71bool cpu_is_stopped(CPUState *cpu)
  72{
  73    return cpu->stopped || !runstate_is_running();
  74}
  75
  76static bool cpu_thread_is_idle(CPUState *cpu)
  77{
  78    if (cpu->stop || cpu->queued_work_first) {
  79        return false;
  80    }
  81    if (cpu_is_stopped(cpu)) {
  82        return true;
  83    }
  84    if (!cpu->halted || cpu_has_work(cpu) ||
  85        kvm_halt_in_kernel()) {
  86        return false;
  87    }
  88    return true;
  89}
  90
  91static bool all_cpu_threads_idle(void)
  92{
  93    CPUState *cpu;
  94
  95    CPU_FOREACH(cpu) {
  96        if (!cpu_thread_is_idle(cpu)) {
  97            return false;
  98        }
  99    }
 100    return true;
 101}
 102
 103/***********************************************************/
 104/* guest cycle counter */
 105
 106/* Protected by TimersState seqlock */
 107
 108static int64_t vm_clock_warp_start = -1;
 109/* Conversion factor from emulated instructions to virtual clock ticks.  */
 110static int icount_time_shift;
 111/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 112#define MAX_ICOUNT_SHIFT 10
 113
 114static QEMUTimer *icount_rt_timer;
 115static QEMUTimer *icount_vm_timer;
 116static QEMUTimer *icount_warp_timer;
 117
 118typedef struct TimersState {
 119    /* Protected by BQL.  */
 120    int64_t cpu_ticks_prev;
 121    int64_t cpu_ticks_offset;
 122
 123    /* cpu_clock_offset can be read out of BQL, so protect it with
 124     * this lock.
 125     */
 126    QemuSeqLock vm_clock_seqlock;
 127    int64_t cpu_clock_offset;
 128    int32_t cpu_ticks_enabled;
 129    int64_t dummy;
 130
 131    /* Compensate for varying guest execution speed.  */
 132    int64_t qemu_icount_bias;
 133    /* Only written by TCG thread */
 134    int64_t qemu_icount;
 135} TimersState;
 136
 137static TimersState timers_state;
 138
 139/* Return the virtual CPU time, based on the instruction counter.  */
 140static int64_t cpu_get_icount_locked(void)
 141{
 142    int64_t icount;
 143    CPUState *cpu = current_cpu;
 144
 145    icount = timers_state.qemu_icount;
 146    if (cpu) {
 147        if (!cpu_can_do_io(cpu)) {
 148            fprintf(stderr, "Bad clock read\n");
 149        }
 150        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
 151    }
 152    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 153}
 154
 155int64_t cpu_get_icount(void)
 156{
 157    int64_t icount;
 158    unsigned start;
 159
 160    do {
 161        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 162        icount = cpu_get_icount_locked();
 163    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 164
 165    return icount;
 166}
 167
 168int64_t cpu_icount_to_ns(int64_t icount)
 169{
 170    return icount << icount_time_shift;
 171}
 172
 173/* return the host CPU cycle counter and handle stop/restart */
 174/* Caller must hold the BQL */
 175int64_t cpu_get_ticks(void)
 176{
 177    int64_t ticks;
 178
 179    if (use_icount) {
 180        return cpu_get_icount();
 181    }
 182
 183    ticks = timers_state.cpu_ticks_offset;
 184    if (timers_state.cpu_ticks_enabled) {
 185        ticks += cpu_get_real_ticks();
 186    }
 187
 188    if (timers_state.cpu_ticks_prev > ticks) {
 189        /* Note: non increasing ticks may happen if the host uses
 190           software suspend */
 191        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 192        ticks = timers_state.cpu_ticks_prev;
 193    }
 194
 195    timers_state.cpu_ticks_prev = ticks;
 196    return ticks;
 197}
 198
 199static int64_t cpu_get_clock_locked(void)
 200{
 201    int64_t ticks;
 202
 203    ticks = timers_state.cpu_clock_offset;
 204    if (timers_state.cpu_ticks_enabled) {
 205        ticks += get_clock();
 206    }
 207
 208    return ticks;
 209}
 210
 211/* return the host CPU monotonic timer and handle stop/restart */
 212int64_t cpu_get_clock(void)
 213{
 214    int64_t ti;
 215    unsigned start;
 216
 217    do {
 218        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 219        ti = cpu_get_clock_locked();
 220    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 221
 222    return ti;
 223}
 224
 225/* return the offset between the host clock and virtual CPU clock */
 226int64_t cpu_get_clock_offset(void)
 227{
 228    int64_t ti;
 229    unsigned start;
 230
 231    do {
 232        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 233        ti = timers_state.cpu_clock_offset;
 234        if (!timers_state.cpu_ticks_enabled) {
 235            ti -= get_clock();
 236        }
 237    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 238
 239    return -ti;
 240}
 241
 242/* enable cpu_get_ticks()
 243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 244 */
 245void cpu_enable_ticks(void)
 246{
 247    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 248    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 249    if (!timers_state.cpu_ticks_enabled) {
 250        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
 251        timers_state.cpu_clock_offset -= get_clock();
 252        timers_state.cpu_ticks_enabled = 1;
 253    }
 254    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 255}
 256
 257/* disable cpu_get_ticks() : the clock is stopped. You must not call
 258 * cpu_get_ticks() after that.
 259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
 260 */
 261void cpu_disable_ticks(void)
 262{
 263    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 264    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 265    if (timers_state.cpu_ticks_enabled) {
 266        timers_state.cpu_ticks_offset += cpu_get_real_ticks();
 267        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 268        timers_state.cpu_ticks_enabled = 0;
 269    }
 270    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 271}
 272
 273/* Correlation between real and virtual time is always going to be
 274   fairly approximate, so ignore small variation.
 275   When the guest is idle real and virtual time will be aligned in
 276   the IO wait loop.  */
 277#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
 278
 279static void icount_adjust(void)
 280{
 281    int64_t cur_time;
 282    int64_t cur_icount;
 283    int64_t delta;
 284
 285    /* Protected by TimersState mutex.  */
 286    static int64_t last_delta;
 287
 288    /* If the VM is not running, then do nothing.  */
 289    if (!runstate_is_running()) {
 290        return;
 291    }
 292
 293    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 294    cur_time = cpu_get_clock_locked();
 295    cur_icount = cpu_get_icount_locked();
 296
 297    delta = cur_icount - cur_time;
 298    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 299    if (delta > 0
 300        && last_delta + ICOUNT_WOBBLE < delta * 2
 301        && icount_time_shift > 0) {
 302        /* The guest is getting too far ahead.  Slow time down.  */
 303        icount_time_shift--;
 304    }
 305    if (delta < 0
 306        && last_delta - ICOUNT_WOBBLE > delta * 2
 307        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 308        /* The guest is getting too far behind.  Speed time up.  */
 309        icount_time_shift++;
 310    }
 311    last_delta = delta;
 312    timers_state.qemu_icount_bias = cur_icount
 313                              - (timers_state.qemu_icount << icount_time_shift);
 314    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 315}
 316
 317static void icount_adjust_rt(void *opaque)
 318{
 319    timer_mod(icount_rt_timer,
 320                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
 321    icount_adjust();
 322}
 323
 324static void icount_adjust_vm(void *opaque)
 325{
 326    timer_mod(icount_vm_timer,
 327                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 328                   get_ticks_per_sec() / 10);
 329    icount_adjust();
 330}
 331
 332static int64_t qemu_icount_round(int64_t count)
 333{
 334    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 335}
 336
 337static void icount_warp_rt(void *opaque)
 338{
 339    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 340     * changes from -1 to another value, so the race here is okay.
 341     */
 342    if (atomic_read(&vm_clock_warp_start) == -1) {
 343        return;
 344    }
 345
 346    seqlock_write_lock(&timers_state.vm_clock_seqlock);
 347    if (runstate_is_running()) {
 348        int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 349        int64_t warp_delta;
 350
 351        warp_delta = clock - vm_clock_warp_start;
 352        if (use_icount == 2) {
 353            /*
 354             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 355             * far ahead of real time.
 356             */
 357            int64_t cur_time = cpu_get_clock_locked();
 358            int64_t cur_icount = cpu_get_icount_locked();
 359            int64_t delta = cur_time - cur_icount;
 360            warp_delta = MIN(warp_delta, delta);
 361        }
 362        timers_state.qemu_icount_bias += warp_delta;
 363    }
 364    vm_clock_warp_start = -1;
 365    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 366
 367    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 368        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 369    }
 370}
 371
 372void qtest_clock_warp(int64_t dest)
 373{
 374    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 375    assert(qtest_enabled());
 376    while (clock < dest) {
 377        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 378        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 379        seqlock_write_lock(&timers_state.vm_clock_seqlock);
 380        timers_state.qemu_icount_bias += warp;
 381        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 382
 383        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 384        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 385    }
 386    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 387}
 388
 389void qemu_clock_warp(QEMUClockType type)
 390{
 391    int64_t clock;
 392    int64_t deadline;
 393
 394    /*
 395     * There are too many global variables to make the "warp" behavior
 396     * applicable to other clocks.  But a clock argument removes the
 397     * need for if statements all over the place.
 398     */
 399    if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
 400        return;
 401    }
 402
 403    /*
 404     * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
 405     * This ensures that the deadline for the timer is computed correctly below.
 406     * This also makes sure that the insn counter is synchronized before the
 407     * CPU starts running, in case the CPU is woken by an event other than
 408     * the earliest QEMU_CLOCK_VIRTUAL timer.
 409     */
 410    icount_warp_rt(NULL);
 411    timer_del(icount_warp_timer);
 412    if (!all_cpu_threads_idle()) {
 413        return;
 414    }
 415
 416    if (qtest_enabled()) {
 417        /* When testing, qtest commands advance icount.  */
 418        return;
 419    }
 420
 421    /* We want to use the earliest deadline from ALL vm_clocks */
 422    clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 423    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 424    if (deadline < 0) {
 425        return;
 426    }
 427
 428    if (deadline > 0) {
 429        /*
 430         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 431         * sleep.  Otherwise, the CPU might be waiting for a future timer
 432         * interrupt to wake it up, but the interrupt never comes because
 433         * the vCPU isn't running any insns and thus doesn't advance the
 434         * QEMU_CLOCK_VIRTUAL.
 435         *
 436         * An extreme solution for this problem would be to never let VCPUs
 437         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
 438         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
 439         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
 440         * after some e"real" time, (related to the time left until the next
 441         * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
 442         * This avoids that the warps are visible externally; for example,
 443         * you will not be sending network packets continuously instead of
 444         * every 100ms.
 445         */
 446        seqlock_write_lock(&timers_state.vm_clock_seqlock);
 447        if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 448            vm_clock_warp_start = clock;
 449        }
 450        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 451        timer_mod_anticipate(icount_warp_timer, clock + deadline);
 452    } else if (deadline == 0) {
 453        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 454    }
 455}
 456
 457static bool icount_state_needed(void *opaque)
 458{
 459    return use_icount;
 460}
 461
 462/*
 463 * This is a subsection for icount migration.
 464 */
 465static const VMStateDescription icount_vmstate_timers = {
 466    .name = "timer/icount",
 467    .version_id = 1,
 468    .minimum_version_id = 1,
 469    .fields = (VMStateField[]) {
 470        VMSTATE_INT64(qemu_icount_bias, TimersState),
 471        VMSTATE_INT64(qemu_icount, TimersState),
 472        VMSTATE_END_OF_LIST()
 473    }
 474};
 475
 476static const VMStateDescription vmstate_timers = {
 477    .name = "timer",
 478    .version_id = 2,
 479    .minimum_version_id = 1,
 480    .fields = (VMStateField[]) {
 481        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 482        VMSTATE_INT64(dummy, TimersState),
 483        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 484        VMSTATE_END_OF_LIST()
 485    },
 486    .subsections = (VMStateSubsection[]) {
 487        {
 488            .vmsd = &icount_vmstate_timers,
 489            .needed = icount_state_needed,
 490        }, {
 491            /* empty */
 492        }
 493    }
 494};
 495
 496void cpu_ticks_init(void)
 497{
 498    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
 499    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 500}
 501
 502void configure_icount(QemuOpts *opts, Error **errp)
 503{
 504    const char *option;
 505    char *rem_str = NULL;
 506
 507    option = qemu_opt_get(opts, "shift");
 508    if (!option) {
 509        if (qemu_opt_get(opts, "align") != NULL) {
 510            error_setg(errp, "Please specify shift option when using align");
 511        }
 512        return;
 513    }
 514    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 515    icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
 516                                          icount_warp_rt, NULL);
 517    if (strcmp(option, "auto") != 0) {
 518        errno = 0;
 519        icount_time_shift = strtol(option, &rem_str, 0);
 520        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 521            error_setg(errp, "icount: Invalid shift value");
 522        }
 523        use_icount = 1;
 524        return;
 525    } else if (icount_align_option) {
 526        error_setg(errp, "shift=auto and align=on are incompatible");
 527    }
 528
 529    use_icount = 2;
 530
 531    /* 125MIPS seems a reasonable initial guess at the guest speed.
 532       It will be corrected fairly quickly anyway.  */
 533    icount_time_shift = 3;
 534
 535    /* Have both realtime and virtual time triggers for speed adjustment.
 536       The realtime trigger catches emulated time passing too slowly,
 537       the virtual time trigger catches emulated time passing too fast.
 538       Realtime triggers occur even when idle, so use them less frequently
 539       than VM triggers.  */
 540    icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
 541                                        icount_adjust_rt, NULL);
 542    timer_mod(icount_rt_timer,
 543                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
 544    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 545                                        icount_adjust_vm, NULL);
 546    timer_mod(icount_vm_timer,
 547                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 548                   get_ticks_per_sec() / 10);
 549}
 550
 551/***********************************************************/
 552void hw_error(const char *fmt, ...)
 553{
 554    va_list ap;
 555    CPUState *cpu;
 556
 557    va_start(ap, fmt);
 558    fprintf(stderr, "qemu: hardware error: ");
 559    vfprintf(stderr, fmt, ap);
 560    fprintf(stderr, "\n");
 561    CPU_FOREACH(cpu) {
 562        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 563        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 564    }
 565    va_end(ap);
 566    abort();
 567}
 568
 569void cpu_synchronize_all_states(void)
 570{
 571    CPUState *cpu;
 572
 573    CPU_FOREACH(cpu) {
 574        cpu_synchronize_state(cpu);
 575    }
 576}
 577
 578void cpu_synchronize_all_post_reset(void)
 579{
 580    CPUState *cpu;
 581
 582    CPU_FOREACH(cpu) {
 583        cpu_synchronize_post_reset(cpu);
 584    }
 585}
 586
 587void cpu_synchronize_all_post_init(void)
 588{
 589    CPUState *cpu;
 590
 591    CPU_FOREACH(cpu) {
 592        cpu_synchronize_post_init(cpu);
 593    }
 594}
 595
 596void cpu_clean_all_dirty(void)
 597{
 598    CPUState *cpu;
 599
 600    CPU_FOREACH(cpu) {
 601        cpu_clean_state(cpu);
 602    }
 603}
 604
 605static int do_vm_stop(RunState state)
 606{
 607    int ret = 0;
 608
 609    if (runstate_is_running()) {
 610        cpu_disable_ticks();
 611        pause_all_vcpus();
 612        runstate_set(state);
 613        vm_state_notify(0, state);
 614        qapi_event_send_stop(&error_abort);
 615    }
 616
 617    bdrv_drain_all();
 618    ret = bdrv_flush_all();
 619
 620    return ret;
 621}
 622
 623static bool cpu_can_run(CPUState *cpu)
 624{
 625    if (cpu->stop) {
 626        return false;
 627    }
 628    if (cpu_is_stopped(cpu)) {
 629        return false;
 630    }
 631    return true;
 632}
 633
 634static void cpu_handle_guest_debug(CPUState *cpu)
 635{
 636    gdb_set_stop_cpu(cpu);
 637    qemu_system_debug_request();
 638    cpu->stopped = true;
 639}
 640
 641static void cpu_signal(int sig)
 642{
 643    if (current_cpu) {
 644        cpu_exit(current_cpu);
 645    }
 646    exit_request = 1;
 647}
 648
 649#ifdef CONFIG_LINUX
 650static void sigbus_reraise(void)
 651{
 652    sigset_t set;
 653    struct sigaction action;
 654
 655    memset(&action, 0, sizeof(action));
 656    action.sa_handler = SIG_DFL;
 657    if (!sigaction(SIGBUS, &action, NULL)) {
 658        raise(SIGBUS);
 659        sigemptyset(&set);
 660        sigaddset(&set, SIGBUS);
 661        sigprocmask(SIG_UNBLOCK, &set, NULL);
 662    }
 663    perror("Failed to re-raise SIGBUS!\n");
 664    abort();
 665}
 666
 667static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
 668                           void *ctx)
 669{
 670    if (kvm_on_sigbus(siginfo->ssi_code,
 671                      (void *)(intptr_t)siginfo->ssi_addr)) {
 672        sigbus_reraise();
 673    }
 674}
 675
 676static void qemu_init_sigbus(void)
 677{
 678    struct sigaction action;
 679
 680    memset(&action, 0, sizeof(action));
 681    action.sa_flags = SA_SIGINFO;
 682    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
 683    sigaction(SIGBUS, &action, NULL);
 684
 685    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 686}
 687
 688static void qemu_kvm_eat_signals(CPUState *cpu)
 689{
 690    struct timespec ts = { 0, 0 };
 691    siginfo_t siginfo;
 692    sigset_t waitset;
 693    sigset_t chkset;
 694    int r;
 695
 696    sigemptyset(&waitset);
 697    sigaddset(&waitset, SIG_IPI);
 698    sigaddset(&waitset, SIGBUS);
 699
 700    do {
 701        r = sigtimedwait(&waitset, &siginfo, &ts);
 702        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
 703            perror("sigtimedwait");
 704            exit(1);
 705        }
 706
 707        switch (r) {
 708        case SIGBUS:
 709            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
 710                sigbus_reraise();
 711            }
 712            break;
 713        default:
 714            break;
 715        }
 716
 717        r = sigpending(&chkset);
 718        if (r == -1) {
 719            perror("sigpending");
 720            exit(1);
 721        }
 722    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
 723}
 724
 725#else /* !CONFIG_LINUX */
 726
 727static void qemu_init_sigbus(void)
 728{
 729}
 730
 731static void qemu_kvm_eat_signals(CPUState *cpu)
 732{
 733}
 734#endif /* !CONFIG_LINUX */
 735
 736#ifndef _WIN32
 737static void dummy_signal(int sig)
 738{
 739}
 740
 741static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 742{
 743    int r;
 744    sigset_t set;
 745    struct sigaction sigact;
 746
 747    memset(&sigact, 0, sizeof(sigact));
 748    sigact.sa_handler = dummy_signal;
 749    sigaction(SIG_IPI, &sigact, NULL);
 750
 751    pthread_sigmask(SIG_BLOCK, NULL, &set);
 752    sigdelset(&set, SIG_IPI);
 753    sigdelset(&set, SIGBUS);
 754    r = kvm_set_signal_mask(cpu, &set);
 755    if (r) {
 756        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
 757        exit(1);
 758    }
 759}
 760
 761static void qemu_tcg_init_cpu_signals(void)
 762{
 763    sigset_t set;
 764    struct sigaction sigact;
 765
 766    memset(&sigact, 0, sizeof(sigact));
 767    sigact.sa_handler = cpu_signal;
 768    sigaction(SIG_IPI, &sigact, NULL);
 769
 770    sigemptyset(&set);
 771    sigaddset(&set, SIG_IPI);
 772    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 773}
 774
 775#else /* _WIN32 */
 776static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 777{
 778    abort();
 779}
 780
 781static void qemu_tcg_init_cpu_signals(void)
 782{
 783}
 784#endif /* _WIN32 */
 785
 786static QemuMutex qemu_global_mutex;
 787static QemuCond qemu_io_proceeded_cond;
 788static bool iothread_requesting_mutex;
 789
 790static QemuThread io_thread;
 791
 792static QemuThread *tcg_cpu_thread;
 793static QemuCond *tcg_halt_cond;
 794
 795/* cpu creation */
 796static QemuCond qemu_cpu_cond;
 797/* system init */
 798static QemuCond qemu_pause_cond;
 799static QemuCond qemu_work_cond;
 800
 801void qemu_init_cpu_loop(void)
 802{
 803    qemu_init_sigbus();
 804    qemu_cond_init(&qemu_cpu_cond);
 805    qemu_cond_init(&qemu_pause_cond);
 806    qemu_cond_init(&qemu_work_cond);
 807    qemu_cond_init(&qemu_io_proceeded_cond);
 808    qemu_mutex_init(&qemu_global_mutex);
 809
 810    qemu_thread_get_self(&io_thread);
 811}
 812
 813void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 814{
 815    struct qemu_work_item wi;
 816
 817    if (qemu_cpu_is_self(cpu)) {
 818        func(data);
 819        return;
 820    }
 821
 822    wi.func = func;
 823    wi.data = data;
 824    wi.free = false;
 825    if (cpu->queued_work_first == NULL) {
 826        cpu->queued_work_first = &wi;
 827    } else {
 828        cpu->queued_work_last->next = &wi;
 829    }
 830    cpu->queued_work_last = &wi;
 831    wi.next = NULL;
 832    wi.done = false;
 833
 834    qemu_cpu_kick(cpu);
 835    while (!wi.done) {
 836        CPUState *self_cpu = current_cpu;
 837
 838        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
 839        current_cpu = self_cpu;
 840    }
 841}
 842
 843void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 844{
 845    struct qemu_work_item *wi;
 846
 847    if (qemu_cpu_is_self(cpu)) {
 848        func(data);
 849        return;
 850    }
 851
 852    wi = g_malloc0(sizeof(struct qemu_work_item));
 853    wi->func = func;
 854    wi->data = data;
 855    wi->free = true;
 856    if (cpu->queued_work_first == NULL) {
 857        cpu->queued_work_first = wi;
 858    } else {
 859        cpu->queued_work_last->next = wi;
 860    }
 861    cpu->queued_work_last = wi;
 862    wi->next = NULL;
 863    wi->done = false;
 864
 865    qemu_cpu_kick(cpu);
 866}
 867
 868static void flush_queued_work(CPUState *cpu)
 869{
 870    struct qemu_work_item *wi;
 871
 872    if (cpu->queued_work_first == NULL) {
 873        return;
 874    }
 875
 876    while ((wi = cpu->queued_work_first)) {
 877        cpu->queued_work_first = wi->next;
 878        wi->func(wi->data);
 879        wi->done = true;
 880        if (wi->free) {
 881            g_free(wi);
 882        }
 883    }
 884    cpu->queued_work_last = NULL;
 885    qemu_cond_broadcast(&qemu_work_cond);
 886}
 887
 888static void qemu_wait_io_event_common(CPUState *cpu)
 889{
 890    if (cpu->stop) {
 891        cpu->stop = false;
 892        cpu->stopped = true;
 893        qemu_cond_signal(&qemu_pause_cond);
 894    }
 895    flush_queued_work(cpu);
 896    cpu->thread_kicked = false;
 897}
 898
 899static void qemu_tcg_wait_io_event(void)
 900{
 901    CPUState *cpu;
 902
 903    while (all_cpu_threads_idle()) {
 904       /* Start accounting real time to the virtual clock if the CPUs
 905          are idle.  */
 906        qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
 907        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
 908    }
 909
 910    while (iothread_requesting_mutex) {
 911        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
 912    }
 913
 914    CPU_FOREACH(cpu) {
 915        qemu_wait_io_event_common(cpu);
 916    }
 917}
 918
 919static void qemu_kvm_wait_io_event(CPUState *cpu)
 920{
 921    while (cpu_thread_is_idle(cpu)) {
 922        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
 923    }
 924
 925    qemu_kvm_eat_signals(cpu);
 926    qemu_wait_io_event_common(cpu);
 927}
 928
 929static void *qemu_kvm_cpu_thread_fn(void *arg)
 930{
 931    CPUState *cpu = arg;
 932    int r;
 933
 934    qemu_mutex_lock(&qemu_global_mutex);
 935    qemu_thread_get_self(cpu->thread);
 936    cpu->thread_id = qemu_get_thread_id();
 937    current_cpu = cpu;
 938
 939    r = kvm_init_vcpu(cpu);
 940    if (r < 0) {
 941        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
 942        exit(1);
 943    }
 944
 945    qemu_kvm_init_cpu_signals(cpu);
 946
 947    /* signal CPU creation */
 948    cpu->created = true;
 949    qemu_cond_signal(&qemu_cpu_cond);
 950
 951    while (1) {
 952        if (cpu_can_run(cpu)) {
 953            r = kvm_cpu_exec(cpu);
 954            if (r == EXCP_DEBUG) {
 955                cpu_handle_guest_debug(cpu);
 956            }
 957        }
 958        qemu_kvm_wait_io_event(cpu);
 959    }
 960
 961    return NULL;
 962}
 963
 964static void *qemu_dummy_cpu_thread_fn(void *arg)
 965{
 966#ifdef _WIN32
 967    fprintf(stderr, "qtest is not supported under Windows\n");
 968    exit(1);
 969#else
 970    CPUState *cpu = arg;
 971    sigset_t waitset;
 972    int r;
 973
 974    qemu_mutex_lock_iothread();
 975    qemu_thread_get_self(cpu->thread);
 976    cpu->thread_id = qemu_get_thread_id();
 977
 978    sigemptyset(&waitset);
 979    sigaddset(&waitset, SIG_IPI);
 980
 981    /* signal CPU creation */
 982    cpu->created = true;
 983    qemu_cond_signal(&qemu_cpu_cond);
 984
 985    current_cpu = cpu;
 986    while (1) {
 987        current_cpu = NULL;
 988        qemu_mutex_unlock_iothread();
 989        do {
 990            int sig;
 991            r = sigwait(&waitset, &sig);
 992        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
 993        if (r == -1) {
 994            perror("sigwait");
 995            exit(1);
 996        }
 997        qemu_mutex_lock_iothread();
 998        current_cpu = cpu;
 999        qemu_wait_io_event_common(cpu);
1000    }
1001
1002    return NULL;
1003#endif
1004}
1005
1006static void tcg_exec_all(void);
1007
1008static void *qemu_tcg_cpu_thread_fn(void *arg)
1009{
1010    CPUState *cpu = arg;
1011
1012    qemu_tcg_init_cpu_signals();
1013    qemu_thread_get_self(cpu->thread);
1014
1015    qemu_mutex_lock(&qemu_global_mutex);
1016    CPU_FOREACH(cpu) {
1017        cpu->thread_id = qemu_get_thread_id();
1018        cpu->created = true;
1019    }
1020    qemu_cond_signal(&qemu_cpu_cond);
1021
1022    /* wait for initial kick-off after machine start */
1023    while (QTAILQ_FIRST(&cpus)->stopped) {
1024        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1025
1026        /* process any pending work */
1027        CPU_FOREACH(cpu) {
1028            qemu_wait_io_event_common(cpu);
1029        }
1030    }
1031
1032    while (1) {
1033        tcg_exec_all();
1034
1035        if (use_icount) {
1036            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1037
1038            if (deadline == 0) {
1039                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1040            }
1041        }
1042        qemu_tcg_wait_io_event();
1043    }
1044
1045    return NULL;
1046}
1047
1048static void qemu_cpu_kick_thread(CPUState *cpu)
1049{
1050#ifndef _WIN32
1051    int err;
1052
1053    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1054    if (err) {
1055        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1056        exit(1);
1057    }
1058#else /* _WIN32 */
1059    if (!qemu_cpu_is_self(cpu)) {
1060        CONTEXT tcgContext;
1061
1062        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1063            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1064                    GetLastError());
1065            exit(1);
1066        }
1067
1068        /* On multi-core systems, we are not sure that the thread is actually
1069         * suspended until we can get the context.
1070         */
1071        tcgContext.ContextFlags = CONTEXT_CONTROL;
1072        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1073            continue;
1074        }
1075
1076        cpu_signal(0);
1077
1078        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1079            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1080                    GetLastError());
1081            exit(1);
1082        }
1083    }
1084#endif
1085}
1086
1087void qemu_cpu_kick(CPUState *cpu)
1088{
1089    qemu_cond_broadcast(cpu->halt_cond);
1090    if (!tcg_enabled() && !cpu->thread_kicked) {
1091        qemu_cpu_kick_thread(cpu);
1092        cpu->thread_kicked = true;
1093    }
1094}
1095
1096void qemu_cpu_kick_self(void)
1097{
1098#ifndef _WIN32
1099    assert(current_cpu);
1100
1101    if (!current_cpu->thread_kicked) {
1102        qemu_cpu_kick_thread(current_cpu);
1103        current_cpu->thread_kicked = true;
1104    }
1105#else
1106    abort();
1107#endif
1108}
1109
1110bool qemu_cpu_is_self(CPUState *cpu)
1111{
1112    return qemu_thread_is_self(cpu->thread);
1113}
1114
1115static bool qemu_in_vcpu_thread(void)
1116{
1117    return current_cpu && qemu_cpu_is_self(current_cpu);
1118}
1119
1120void qemu_mutex_lock_iothread(void)
1121{
1122    if (!tcg_enabled()) {
1123        qemu_mutex_lock(&qemu_global_mutex);
1124    } else {
1125        iothread_requesting_mutex = true;
1126        if (qemu_mutex_trylock(&qemu_global_mutex)) {
1127            qemu_cpu_kick_thread(first_cpu);
1128            qemu_mutex_lock(&qemu_global_mutex);
1129        }
1130        iothread_requesting_mutex = false;
1131        qemu_cond_broadcast(&qemu_io_proceeded_cond);
1132    }
1133}
1134
1135void qemu_mutex_unlock_iothread(void)
1136{
1137    qemu_mutex_unlock(&qemu_global_mutex);
1138}
1139
1140static int all_vcpus_paused(void)
1141{
1142    CPUState *cpu;
1143
1144    CPU_FOREACH(cpu) {
1145        if (!cpu->stopped) {
1146            return 0;
1147        }
1148    }
1149
1150    return 1;
1151}
1152
1153void pause_all_vcpus(void)
1154{
1155    CPUState *cpu;
1156
1157    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1158    CPU_FOREACH(cpu) {
1159        cpu->stop = true;
1160        qemu_cpu_kick(cpu);
1161    }
1162
1163    if (qemu_in_vcpu_thread()) {
1164        cpu_stop_current();
1165        if (!kvm_enabled()) {
1166            CPU_FOREACH(cpu) {
1167                cpu->stop = false;
1168                cpu->stopped = true;
1169            }
1170            return;
1171        }
1172    }
1173
1174    while (!all_vcpus_paused()) {
1175        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1176        CPU_FOREACH(cpu) {
1177            qemu_cpu_kick(cpu);
1178        }
1179    }
1180}
1181
1182void cpu_resume(CPUState *cpu)
1183{
1184    cpu->stop = false;
1185    cpu->stopped = false;
1186    qemu_cpu_kick(cpu);
1187}
1188
1189void resume_all_vcpus(void)
1190{
1191    CPUState *cpu;
1192
1193    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1194    CPU_FOREACH(cpu) {
1195        cpu_resume(cpu);
1196    }
1197}
1198
1199/* For temporary buffers for forming a name */
1200#define VCPU_THREAD_NAME_SIZE 16
1201
1202static void qemu_tcg_init_vcpu(CPUState *cpu)
1203{
1204    char thread_name[VCPU_THREAD_NAME_SIZE];
1205
1206    tcg_cpu_address_space_init(cpu, cpu->as);
1207
1208    /* share a single thread for all cpus with TCG */
1209    if (!tcg_cpu_thread) {
1210        cpu->thread = g_malloc0(sizeof(QemuThread));
1211        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1212        qemu_cond_init(cpu->halt_cond);
1213        tcg_halt_cond = cpu->halt_cond;
1214        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1215                 cpu->cpu_index);
1216        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1217                           cpu, QEMU_THREAD_JOINABLE);
1218#ifdef _WIN32
1219        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1220#endif
1221        while (!cpu->created) {
1222            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1223        }
1224        tcg_cpu_thread = cpu->thread;
1225    } else {
1226        cpu->thread = tcg_cpu_thread;
1227        cpu->halt_cond = tcg_halt_cond;
1228    }
1229}
1230
1231static void qemu_kvm_start_vcpu(CPUState *cpu)
1232{
1233    char thread_name[VCPU_THREAD_NAME_SIZE];
1234
1235    cpu->thread = g_malloc0(sizeof(QemuThread));
1236    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1237    qemu_cond_init(cpu->halt_cond);
1238    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1239             cpu->cpu_index);
1240    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1241                       cpu, QEMU_THREAD_JOINABLE);
1242    while (!cpu->created) {
1243        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1244    }
1245}
1246
1247static void qemu_dummy_start_vcpu(CPUState *cpu)
1248{
1249    char thread_name[VCPU_THREAD_NAME_SIZE];
1250
1251    cpu->thread = g_malloc0(sizeof(QemuThread));
1252    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1253    qemu_cond_init(cpu->halt_cond);
1254    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1255             cpu->cpu_index);
1256    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1257                       QEMU_THREAD_JOINABLE);
1258    while (!cpu->created) {
1259        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1260    }
1261}
1262
1263void qemu_init_vcpu(CPUState *cpu)
1264{
1265    cpu->nr_cores = smp_cores;
1266    cpu->nr_threads = smp_threads;
1267    cpu->stopped = true;
1268    if (kvm_enabled()) {
1269        qemu_kvm_start_vcpu(cpu);
1270    } else if (tcg_enabled()) {
1271        qemu_tcg_init_vcpu(cpu);
1272    } else {
1273        qemu_dummy_start_vcpu(cpu);
1274    }
1275}
1276
1277void cpu_stop_current(void)
1278{
1279    if (current_cpu) {
1280        current_cpu->stop = false;
1281        current_cpu->stopped = true;
1282        cpu_exit(current_cpu);
1283        qemu_cond_signal(&qemu_pause_cond);
1284    }
1285}
1286
1287int vm_stop(RunState state)
1288{
1289    if (qemu_in_vcpu_thread()) {
1290        qemu_system_vmstop_request_prepare();
1291        qemu_system_vmstop_request(state);
1292        /*
1293         * FIXME: should not return to device code in case
1294         * vm_stop() has been requested.
1295         */
1296        cpu_stop_current();
1297        return 0;
1298    }
1299
1300    return do_vm_stop(state);
1301}
1302
1303/* does a state transition even if the VM is already stopped,
1304   current state is forgotten forever */
1305int vm_stop_force_state(RunState state)
1306{
1307    if (runstate_is_running()) {
1308        return vm_stop(state);
1309    } else {
1310        runstate_set(state);
1311        /* Make sure to return an error if the flush in a previous vm_stop()
1312         * failed. */
1313        return bdrv_flush_all();
1314    }
1315}
1316
1317static int tcg_cpu_exec(CPUArchState *env)
1318{
1319    CPUState *cpu = ENV_GET_CPU(env);
1320    int ret;
1321#ifdef CONFIG_PROFILER
1322    int64_t ti;
1323#endif
1324
1325#ifdef CONFIG_PROFILER
1326    ti = profile_getclock();
1327#endif
1328    if (use_icount) {
1329        int64_t count;
1330        int64_t deadline;
1331        int decr;
1332        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1333                                    + cpu->icount_extra);
1334        cpu->icount_decr.u16.low = 0;
1335        cpu->icount_extra = 0;
1336        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1337
1338        /* Maintain prior (possibly buggy) behaviour where if no deadline
1339         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1340         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1341         * nanoseconds.
1342         */
1343        if ((deadline < 0) || (deadline > INT32_MAX)) {
1344            deadline = INT32_MAX;
1345        }
1346
1347        count = qemu_icount_round(deadline);
1348        timers_state.qemu_icount += count;
1349        decr = (count > 0xffff) ? 0xffff : count;
1350        count -= decr;
1351        cpu->icount_decr.u16.low = decr;
1352        cpu->icount_extra = count;
1353    }
1354    ret = cpu_exec(env);
1355#ifdef CONFIG_PROFILER
1356    qemu_time += profile_getclock() - ti;
1357#endif
1358    if (use_icount) {
1359        /* Fold pending instructions back into the
1360           instruction counter, and clear the interrupt flag.  */
1361        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1362                        + cpu->icount_extra);
1363        cpu->icount_decr.u32 = 0;
1364        cpu->icount_extra = 0;
1365    }
1366    return ret;
1367}
1368
1369static void tcg_exec_all(void)
1370{
1371    int r;
1372
1373    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1374    qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1375
1376    if (next_cpu == NULL) {
1377        next_cpu = first_cpu;
1378    }
1379    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1380        CPUState *cpu = next_cpu;
1381        CPUArchState *env = cpu->env_ptr;
1382
1383        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1384                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1385
1386        if (cpu_can_run(cpu)) {
1387            r = tcg_cpu_exec(env);
1388            if (r == EXCP_DEBUG) {
1389                cpu_handle_guest_debug(cpu);
1390                break;
1391            }
1392        } else if (cpu->stop || cpu->stopped) {
1393            break;
1394        }
1395    }
1396    exit_request = 0;
1397}
1398
1399void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1400{
1401    /* XXX: implement xxx_cpu_list for targets that still miss it */
1402#if defined(cpu_list)
1403    cpu_list(f, cpu_fprintf);
1404#endif
1405}
1406
1407CpuInfoList *qmp_query_cpus(Error **errp)
1408{
1409    CpuInfoList *head = NULL, *cur_item = NULL;
1410    CPUState *cpu;
1411
1412    CPU_FOREACH(cpu) {
1413        CpuInfoList *info;
1414#if defined(TARGET_I386)
1415        X86CPU *x86_cpu = X86_CPU(cpu);
1416        CPUX86State *env = &x86_cpu->env;
1417#elif defined(TARGET_PPC)
1418        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1419        CPUPPCState *env = &ppc_cpu->env;
1420#elif defined(TARGET_SPARC)
1421        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1422        CPUSPARCState *env = &sparc_cpu->env;
1423#elif defined(TARGET_MIPS)
1424        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1425        CPUMIPSState *env = &mips_cpu->env;
1426#elif defined(TARGET_TRICORE)
1427        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1428        CPUTriCoreState *env = &tricore_cpu->env;
1429#endif
1430
1431        cpu_synchronize_state(cpu);
1432
1433        info = g_malloc0(sizeof(*info));
1434        info->value = g_malloc0(sizeof(*info->value));
1435        info->value->CPU = cpu->cpu_index;
1436        info->value->current = (cpu == first_cpu);
1437        info->value->halted = cpu->halted;
1438        info->value->thread_id = cpu->thread_id;
1439#if defined(TARGET_I386)
1440        info->value->has_pc = true;
1441        info->value->pc = env->eip + env->segs[R_CS].base;
1442#elif defined(TARGET_PPC)
1443        info->value->has_nip = true;
1444        info->value->nip = env->nip;
1445#elif defined(TARGET_SPARC)
1446        info->value->has_pc = true;
1447        info->value->pc = env->pc;
1448        info->value->has_npc = true;
1449        info->value->npc = env->npc;
1450#elif defined(TARGET_MIPS)
1451        info->value->has_PC = true;
1452        info->value->PC = env->active_tc.PC;
1453#elif defined(TARGET_TRICORE)
1454        info->value->has_PC = true;
1455        info->value->PC = env->PC;
1456#endif
1457
1458        /* XXX: waiting for the qapi to support GSList */
1459        if (!cur_item) {
1460            head = cur_item = info;
1461        } else {
1462            cur_item->next = info;
1463            cur_item = info;
1464        }
1465    }
1466
1467    return head;
1468}
1469
1470void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1471                 bool has_cpu, int64_t cpu_index, Error **errp)
1472{
1473    FILE *f;
1474    uint32_t l;
1475    CPUState *cpu;
1476    uint8_t buf[1024];
1477
1478    if (!has_cpu) {
1479        cpu_index = 0;
1480    }
1481
1482    cpu = qemu_get_cpu(cpu_index);
1483    if (cpu == NULL) {
1484        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1485                  "a CPU number");
1486        return;
1487    }
1488
1489    f = fopen(filename, "wb");
1490    if (!f) {
1491        error_setg_file_open(errp, errno, filename);
1492        return;
1493    }
1494
1495    while (size != 0) {
1496        l = sizeof(buf);
1497        if (l > size)
1498            l = size;
1499        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1500            error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1501            goto exit;
1502        }
1503        if (fwrite(buf, 1, l, f) != l) {
1504            error_set(errp, QERR_IO_ERROR);
1505            goto exit;
1506        }
1507        addr += l;
1508        size -= l;
1509    }
1510
1511exit:
1512    fclose(f);
1513}
1514
1515void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1516                  Error **errp)
1517{
1518    FILE *f;
1519    uint32_t l;
1520    uint8_t buf[1024];
1521
1522    f = fopen(filename, "wb");
1523    if (!f) {
1524        error_setg_file_open(errp, errno, filename);
1525        return;
1526    }
1527
1528    while (size != 0) {
1529        l = sizeof(buf);
1530        if (l > size)
1531            l = size;
1532        cpu_physical_memory_read(addr, buf, l);
1533        if (fwrite(buf, 1, l, f) != l) {
1534            error_set(errp, QERR_IO_ERROR);
1535            goto exit;
1536        }
1537        addr += l;
1538        size -= l;
1539    }
1540
1541exit:
1542    fclose(f);
1543}
1544
1545void qmp_inject_nmi(Error **errp)
1546{
1547#if defined(TARGET_I386)
1548    CPUState *cs;
1549
1550    CPU_FOREACH(cs) {
1551        X86CPU *cpu = X86_CPU(cs);
1552
1553        if (!cpu->apic_state) {
1554            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1555        } else {
1556            apic_deliver_nmi(cpu->apic_state);
1557        }
1558    }
1559#else
1560    nmi_monitor_handle(monitor_get_cpu_index(), errp);
1561#endif
1562}
1563
1564void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1565{
1566    if (!use_icount) {
1567        return;
1568    }
1569
1570    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1571                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1572    if (icount_align_option) {
1573        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1574        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1575    } else {
1576        cpu_fprintf(f, "Max guest delay     NA\n");
1577        cpu_fprintf(f, "Max guest advance   NA\n");
1578    }
1579}
1580