qemu/accel/tcg/cpu-exec.c
<<
>>
Prefs
   1/*
   2 *  emulator main execution loop
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22#include "qemu/qemu-print.h"
  23#include "hw/core/tcg-cpu-ops.h"
  24#include "trace.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg.h"
  28#include "qemu/atomic.h"
  29#include "qemu/compiler.h"
  30#include "qemu/timer.h"
  31#include "qemu/rcu.h"
  32#include "exec/log.h"
  33#include "qemu/main-loop.h"
  34#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  35#include "hw/i386/apic.h"
  36#endif
  37#include "sysemu/cpus.h"
  38#include "exec/cpu-all.h"
  39#include "sysemu/cpu-timers.h"
  40#include "sysemu/replay.h"
  41#include "exec/helper-proto.h"
  42#include "tb-hash.h"
  43#include "tb-context.h"
  44#include "internal.h"
  45
  46/* -icount align implementation. */
  47
  48typedef struct SyncClocks {
  49    int64_t diff_clk;
  50    int64_t last_cpu_icount;
  51    int64_t realtime_clock;
  52} SyncClocks;
  53
  54#if !defined(CONFIG_USER_ONLY)
  55/* Allow the guest to have a max 3ms advance.
  56 * The difference between the 2 clocks could therefore
  57 * oscillate around 0.
  58 */
  59#define VM_CLOCK_ADVANCE 3000000
  60#define THRESHOLD_REDUCE 1.5
  61#define MAX_DELAY_PRINT_RATE 2000000000LL
  62#define MAX_NB_PRINTS 100
  63
  64static int64_t max_delay;
  65static int64_t max_advance;
  66
  67static void align_clocks(SyncClocks *sc, CPUState *cpu)
  68{
  69    int64_t cpu_icount;
  70
  71    if (!icount_align_option) {
  72        return;
  73    }
  74
  75    cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
  76    sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  77    sc->last_cpu_icount = cpu_icount;
  78
  79    if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  80#ifndef _WIN32
  81        struct timespec sleep_delay, rem_delay;
  82        sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  83        sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  84        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  85            sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  86        } else {
  87            sc->diff_clk = 0;
  88        }
  89#else
  90        Sleep(sc->diff_clk / SCALE_MS);
  91        sc->diff_clk = 0;
  92#endif
  93    }
  94}
  95
  96static void print_delay(const SyncClocks *sc)
  97{
  98    static float threshold_delay;
  99    static int64_t last_realtime_clock;
 100    static int nb_prints;
 101
 102    if (icount_align_option &&
 103        sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
 104        nb_prints < MAX_NB_PRINTS) {
 105        if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
 106            (-sc->diff_clk / (float)1000000000LL <
 107             (threshold_delay - THRESHOLD_REDUCE))) {
 108            threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
 109            qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
 110                        threshold_delay - 1,
 111                        threshold_delay);
 112            nb_prints++;
 113            last_realtime_clock = sc->realtime_clock;
 114        }
 115    }
 116}
 117
 118static void init_delay_params(SyncClocks *sc, CPUState *cpu)
 119{
 120    if (!icount_align_option) {
 121        return;
 122    }
 123    sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 124    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
 125    sc->last_cpu_icount
 126        = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
 127    if (sc->diff_clk < max_delay) {
 128        max_delay = sc->diff_clk;
 129    }
 130    if (sc->diff_clk > max_advance) {
 131        max_advance = sc->diff_clk;
 132    }
 133
 134    /* Print every 2s max if the guest is late. We limit the number
 135       of printed messages to NB_PRINT_MAX(currently 100) */
 136    print_delay(sc);
 137}
 138#else
 139static void align_clocks(SyncClocks *sc, const CPUState *cpu)
 140{
 141}
 142
 143static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 144{
 145}
 146#endif /* CONFIG USER ONLY */
 147
 148uint32_t curr_cflags(CPUState *cpu)
 149{
 150    uint32_t cflags = cpu->tcg_cflags;
 151
 152    /*
 153     * Record gdb single-step.  We should be exiting the TB by raising
 154     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
 155     *
 156     * For singlestep and -d nochain, suppress goto_tb so that
 157     * we can log -d cpu,exec after every TB.
 158     */
 159    if (unlikely(cpu->singlestep_enabled)) {
 160        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
 161    } else if (singlestep) {
 162        cflags |= CF_NO_GOTO_TB | 1;
 163    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
 164        cflags |= CF_NO_GOTO_TB;
 165    }
 166
 167    return cflags;
 168}
 169
 170/* Might cause an exception, so have a longjmp destination ready */
 171static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
 172                                          target_ulong cs_base,
 173                                          uint32_t flags, uint32_t cflags)
 174{
 175    TranslationBlock *tb;
 176    uint32_t hash;
 177
 178    /* we should never be trying to look up an INVALID tb */
 179    tcg_debug_assert(!(cflags & CF_INVALID));
 180
 181    hash = tb_jmp_cache_hash_func(pc);
 182    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 183
 184    if (likely(tb &&
 185               tb->pc == pc &&
 186               tb->cs_base == cs_base &&
 187               tb->flags == flags &&
 188               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 189               tb_cflags(tb) == cflags)) {
 190        return tb;
 191    }
 192    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 193    if (tb == NULL) {
 194        return NULL;
 195    }
 196    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
 197    return tb;
 198}
 199
 200static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
 201                                const TranslationBlock *tb)
 202{
 203    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
 204        && qemu_log_in_addr_range(pc)) {
 205
 206        qemu_log_mask(CPU_LOG_EXEC,
 207                      "Trace %d: %p [" TARGET_FMT_lx
 208                      "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
 209                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
 210                      tb->flags, tb->cflags, lookup_symbol(pc));
 211
 212#if defined(DEBUG_DISAS)
 213        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
 214            FILE *logfile = qemu_log_lock();
 215            int flags = 0;
 216
 217            if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
 218                flags |= CPU_DUMP_FPU;
 219            }
 220#if defined(TARGET_I386)
 221            flags |= CPU_DUMP_CCOP;
 222#endif
 223            log_cpu_state(cpu, flags);
 224            qemu_log_unlock(logfile);
 225        }
 226#endif /* DEBUG_DISAS */
 227    }
 228}
 229
 230static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
 231                                  uint32_t *cflags)
 232{
 233    CPUBreakpoint *bp;
 234    bool match_page = false;
 235
 236    if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
 237        return false;
 238    }
 239
 240    /*
 241     * Singlestep overrides breakpoints.
 242     * This requirement is visible in the record-replay tests, where
 243     * we would fail to make forward progress in reverse-continue.
 244     *
 245     * TODO: gdb singlestep should only override gdb breakpoints,
 246     * so that one could (gdb) singlestep into the guest kernel's
 247     * architectural breakpoint handler.
 248     */
 249    if (cpu->singlestep_enabled) {
 250        return false;
 251    }
 252
 253    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 254        /*
 255         * If we have an exact pc match, trigger the breakpoint.
 256         * Otherwise, note matches within the page.
 257         */
 258        if (pc == bp->pc) {
 259            bool match_bp = false;
 260
 261            if (bp->flags & BP_GDB) {
 262                match_bp = true;
 263            } else if (bp->flags & BP_CPU) {
 264#ifdef CONFIG_USER_ONLY
 265                g_assert_not_reached();
 266#else
 267                CPUClass *cc = CPU_GET_CLASS(cpu);
 268                assert(cc->tcg_ops->debug_check_breakpoint);
 269                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 270#endif
 271            }
 272
 273            if (match_bp) {
 274                cpu->exception_index = EXCP_DEBUG;
 275                return true;
 276            }
 277        } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
 278            match_page = true;
 279        }
 280    }
 281
 282    /*
 283     * Within the same page as a breakpoint, single-step,
 284     * returning to helper_lookup_tb_ptr after each insn looking
 285     * for the actual breakpoint.
 286     *
 287     * TODO: Perhaps better to record all of the TBs associated
 288     * with a given virtual page that contains a breakpoint, and
 289     * then invalidate them when a new overlapping breakpoint is
 290     * set on the page.  Non-overlapping TBs would not be
 291     * invalidated, nor would any TB need to be invalidated as
 292     * breakpoints are removed.
 293     */
 294    if (match_page) {
 295        *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
 296    }
 297    return false;
 298}
 299
 300/**
 301 * helper_lookup_tb_ptr: quick check for next tb
 302 * @env: current cpu state
 303 *
 304 * Look for an existing TB matching the current cpu state.
 305 * If found, return the code pointer.  If not found, return
 306 * the tcg epilogue so that we return into cpu_tb_exec.
 307 */
 308const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 309{
 310    CPUState *cpu = env_cpu(env);
 311    TranslationBlock *tb;
 312    target_ulong cs_base, pc;
 313    uint32_t flags, cflags;
 314
 315    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 316
 317    cflags = curr_cflags(cpu);
 318    if (check_for_breakpoints(cpu, pc, &cflags)) {
 319        cpu_loop_exit(cpu);
 320    }
 321
 322    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 323    if (tb == NULL) {
 324        return tcg_code_gen_epilogue;
 325    }
 326
 327    log_cpu_exec(pc, cpu, tb);
 328
 329    return tb->tc.ptr;
 330}
 331
 332/* Execute a TB, and fix up the CPU state afterwards if necessary */
 333/*
 334 * Disable CFI checks.
 335 * TCG creates binary blobs at runtime, with the transformed code.
 336 * A TB is a blob of binary code, created at runtime and called with an
 337 * indirect function call. Since such function did not exist at compile time,
 338 * the CFI runtime has no way to verify its signature and would fail.
 339 * TCG is not considered a security-sensitive part of QEMU so this does not
 340 * affect the impact of CFI in environment with high security requirements
 341 */
 342static inline TranslationBlock * QEMU_DISABLE_CFI
 343cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 344{
 345    CPUArchState *env = cpu->env_ptr;
 346    uintptr_t ret;
 347    TranslationBlock *last_tb;
 348    const void *tb_ptr = itb->tc.ptr;
 349
 350    log_cpu_exec(itb->pc, cpu, itb);
 351
 352    qemu_thread_jit_execute();
 353    ret = tcg_qemu_tb_exec(env, tb_ptr);
 354    cpu->can_do_io = 1;
 355    /*
 356     * TODO: Delay swapping back to the read-write region of the TB
 357     * until we actually need to modify the TB.  The read-only copy,
 358     * coming from the rx region, shares the same host TLB entry as
 359     * the code that executed the exit_tb opcode that arrived here.
 360     * If we insist on touching both the RX and the RW pages, we
 361     * double the host TLB pressure.
 362     */
 363    last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
 364    *tb_exit = ret & TB_EXIT_MASK;
 365
 366    trace_exec_tb_exit(last_tb, *tb_exit);
 367
 368    if (*tb_exit > TB_EXIT_IDX1) {
 369        /* We didn't start executing this TB (eg because the instruction
 370         * counter hit zero); we must restore the guest PC to the address
 371         * of the start of the TB.
 372         */
 373        CPUClass *cc = CPU_GET_CLASS(cpu);
 374        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
 375                               "Stopped execution of TB chain before %p ["
 376                               TARGET_FMT_lx "] %s\n",
 377                               last_tb->tc.ptr, last_tb->pc,
 378                               lookup_symbol(last_tb->pc));
 379        if (cc->tcg_ops->synchronize_from_tb) {
 380            cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
 381        } else {
 382            assert(cc->set_pc);
 383            cc->set_pc(cpu, last_tb->pc);
 384        }
 385    }
 386    return last_tb;
 387}
 388
 389
 390static void cpu_exec_enter(CPUState *cpu)
 391{
 392    CPUClass *cc = CPU_GET_CLASS(cpu);
 393
 394    if (cc->tcg_ops->cpu_exec_enter) {
 395        cc->tcg_ops->cpu_exec_enter(cpu);
 396    }
 397}
 398
 399static void cpu_exec_exit(CPUState *cpu)
 400{
 401    CPUClass *cc = CPU_GET_CLASS(cpu);
 402
 403    if (cc->tcg_ops->cpu_exec_exit) {
 404        cc->tcg_ops->cpu_exec_exit(cpu);
 405    }
 406}
 407
 408void cpu_exec_step_atomic(CPUState *cpu)
 409{
 410    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
 411    TranslationBlock *tb;
 412    target_ulong cs_base, pc;
 413    uint32_t flags, cflags;
 414    int tb_exit;
 415
 416    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
 417        start_exclusive();
 418        g_assert(cpu == current_cpu);
 419        g_assert(!cpu->running);
 420        cpu->running = true;
 421
 422        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 423
 424        cflags = curr_cflags(cpu);
 425        /* Execute in a serial context. */
 426        cflags &= ~CF_PARALLEL;
 427        /* After 1 insn, return and release the exclusive lock. */
 428        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
 429        /*
 430         * No need to check_for_breakpoints here.
 431         * We only arrive in cpu_exec_step_atomic after beginning execution
 432         * of an insn that includes an atomic operation we can't handle.
 433         * Any breakpoint for this insn will have been recognized earlier.
 434         */
 435
 436        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 437        if (tb == NULL) {
 438            mmap_lock();
 439            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 440            mmap_unlock();
 441        }
 442
 443        cpu_exec_enter(cpu);
 444        /* execute the generated code */
 445        trace_exec_tb(tb, pc);
 446        cpu_tb_exec(cpu, tb, &tb_exit);
 447        cpu_exec_exit(cpu);
 448    } else {
 449        /*
 450         * The mmap_lock is dropped by tb_gen_code if it runs out of
 451         * memory.
 452         */
 453#ifndef CONFIG_SOFTMMU
 454        tcg_debug_assert(!have_mmap_lock());
 455#endif
 456        if (qemu_mutex_iothread_locked()) {
 457            qemu_mutex_unlock_iothread();
 458        }
 459        assert_no_pages_locked();
 460        qemu_plugin_disable_mem_helpers(cpu);
 461    }
 462
 463
 464    /*
 465     * As we start the exclusive region before codegen we must still
 466     * be in the region if we longjump out of either the codegen or
 467     * the execution.
 468     */
 469    g_assert(cpu_in_exclusive_context(cpu));
 470    cpu->running = false;
 471    end_exclusive();
 472}
 473
 474struct tb_desc {
 475    target_ulong pc;
 476    target_ulong cs_base;
 477    CPUArchState *env;
 478    tb_page_addr_t phys_page1;
 479    uint32_t flags;
 480    uint32_t cflags;
 481    uint32_t trace_vcpu_dstate;
 482};
 483
 484static bool tb_lookup_cmp(const void *p, const void *d)
 485{
 486    const TranslationBlock *tb = p;
 487    const struct tb_desc *desc = d;
 488
 489    if (tb->pc == desc->pc &&
 490        tb->page_addr[0] == desc->phys_page1 &&
 491        tb->cs_base == desc->cs_base &&
 492        tb->flags == desc->flags &&
 493        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 494        tb_cflags(tb) == desc->cflags) {
 495        /* check next page if needed */
 496        if (tb->page_addr[1] == -1) {
 497            return true;
 498        } else {
 499            tb_page_addr_t phys_page2;
 500            target_ulong virt_page2;
 501
 502            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 503            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 504            if (tb->page_addr[1] == phys_page2) {
 505                return true;
 506            }
 507        }
 508    }
 509    return false;
 510}
 511
 512TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 513                                   target_ulong cs_base, uint32_t flags,
 514                                   uint32_t cflags)
 515{
 516    tb_page_addr_t phys_pc;
 517    struct tb_desc desc;
 518    uint32_t h;
 519
 520    desc.env = (CPUArchState *)cpu->env_ptr;
 521    desc.cs_base = cs_base;
 522    desc.flags = flags;
 523    desc.cflags = cflags;
 524    desc.trace_vcpu_dstate = *cpu->trace_dstate;
 525    desc.pc = pc;
 526    phys_pc = get_page_addr_code(desc.env, pc);
 527    if (phys_pc == -1) {
 528        return NULL;
 529    }
 530    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
 531    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
 532    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 533}
 534
 535void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 536{
 537    if (TCG_TARGET_HAS_direct_jump) {
 538        uintptr_t offset = tb->jmp_target_arg[n];
 539        uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
 540        uintptr_t jmp_rx = tc_ptr + offset;
 541        uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
 542        tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr);
 543    } else {
 544        tb->jmp_target_arg[n] = addr;
 545    }
 546}
 547
 548static inline void tb_add_jump(TranslationBlock *tb, int n,
 549                               TranslationBlock *tb_next)
 550{
 551    uintptr_t old;
 552
 553    qemu_thread_jit_write();
 554    assert(n < ARRAY_SIZE(tb->jmp_list_next));
 555    qemu_spin_lock(&tb_next->jmp_lock);
 556
 557    /* make sure the destination TB is valid */
 558    if (tb_next->cflags & CF_INVALID) {
 559        goto out_unlock_next;
 560    }
 561    /* Atomically claim the jump destination slot only if it was NULL */
 562    old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
 563                          (uintptr_t)tb_next);
 564    if (old) {
 565        goto out_unlock_next;
 566    }
 567
 568    /* patch the native jump address */
 569    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
 570
 571    /* add in TB jmp list */
 572    tb->jmp_list_next[n] = tb_next->jmp_list_head;
 573    tb_next->jmp_list_head = (uintptr_t)tb | n;
 574
 575    qemu_spin_unlock(&tb_next->jmp_lock);
 576
 577    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
 578                           "Linking TBs %p [" TARGET_FMT_lx
 579                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
 580                           tb->tc.ptr, tb->pc, n,
 581                           tb_next->tc.ptr, tb_next->pc);
 582    return;
 583
 584 out_unlock_next:
 585    qemu_spin_unlock(&tb_next->jmp_lock);
 586    return;
 587}
 588
 589static inline bool cpu_handle_halt(CPUState *cpu)
 590{
 591    if (cpu->halted) {
 592#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 593        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 594            X86CPU *x86_cpu = X86_CPU(cpu);
 595            qemu_mutex_lock_iothread();
 596            apic_poll_irq(x86_cpu->apic_state);
 597            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
 598            qemu_mutex_unlock_iothread();
 599        }
 600#endif
 601        if (!cpu_has_work(cpu)) {
 602            return true;
 603        }
 604
 605        cpu->halted = 0;
 606    }
 607
 608    return false;
 609}
 610
 611static inline void cpu_handle_debug_exception(CPUState *cpu)
 612{
 613    CPUClass *cc = CPU_GET_CLASS(cpu);
 614    CPUWatchpoint *wp;
 615
 616    if (!cpu->watchpoint_hit) {
 617        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 618            wp->flags &= ~BP_WATCHPOINT_HIT;
 619        }
 620    }
 621
 622    if (cc->tcg_ops->debug_excp_handler) {
 623        cc->tcg_ops->debug_excp_handler(cpu);
 624    }
 625}
 626
 627static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 628{
 629    if (cpu->exception_index < 0) {
 630#ifndef CONFIG_USER_ONLY
 631        if (replay_has_exception()
 632            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
 633            /* Execute just one insn to trigger exception pending in the log */
 634            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
 635        }
 636#endif
 637        return false;
 638    }
 639    if (cpu->exception_index >= EXCP_INTERRUPT) {
 640        /* exit request from the cpu execution loop */
 641        *ret = cpu->exception_index;
 642        if (*ret == EXCP_DEBUG) {
 643            cpu_handle_debug_exception(cpu);
 644        }
 645        cpu->exception_index = -1;
 646        return true;
 647    } else {
 648#if defined(CONFIG_USER_ONLY)
 649        /* if user mode only, we simulate a fake exception
 650           which will be handled outside the cpu execution
 651           loop */
 652#if defined(TARGET_I386)
 653        CPUClass *cc = CPU_GET_CLASS(cpu);
 654        cc->tcg_ops->do_interrupt(cpu);
 655#endif
 656        *ret = cpu->exception_index;
 657        cpu->exception_index = -1;
 658        return true;
 659#else
 660        if (replay_exception()) {
 661            CPUClass *cc = CPU_GET_CLASS(cpu);
 662            qemu_mutex_lock_iothread();
 663            cc->tcg_ops->do_interrupt(cpu);
 664            qemu_mutex_unlock_iothread();
 665            cpu->exception_index = -1;
 666
 667            if (unlikely(cpu->singlestep_enabled)) {
 668                /*
 669                 * After processing the exception, ensure an EXCP_DEBUG is
 670                 * raised when single-stepping so that GDB doesn't miss the
 671                 * next instruction.
 672                 */
 673                *ret = EXCP_DEBUG;
 674                cpu_handle_debug_exception(cpu);
 675                return true;
 676            }
 677        } else if (!replay_has_interrupt()) {
 678            /* give a chance to iothread in replay mode */
 679            *ret = EXCP_INTERRUPT;
 680            return true;
 681        }
 682#endif
 683    }
 684
 685    return false;
 686}
 687
 688/*
 689 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 690 * "real" interrupt event later. It does not need to be recorded for
 691 * replay purposes.
 692 */
 693static inline bool need_replay_interrupt(int interrupt_request)
 694{
 695#if defined(TARGET_I386)
 696    return !(interrupt_request & CPU_INTERRUPT_POLL);
 697#else
 698    return true;
 699#endif
 700}
 701
 702static inline bool cpu_handle_interrupt(CPUState *cpu,
 703                                        TranslationBlock **last_tb)
 704{
 705    CPUClass *cc = CPU_GET_CLASS(cpu);
 706
 707    /* Clear the interrupt flag now since we're processing
 708     * cpu->interrupt_request and cpu->exit_request.
 709     * Ensure zeroing happens before reading cpu->exit_request or
 710     * cpu->interrupt_request (see also smp_wmb in cpu_exit())
 711     */
 712    qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
 713
 714    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
 715        int interrupt_request;
 716        qemu_mutex_lock_iothread();
 717        interrupt_request = cpu->interrupt_request;
 718        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
 719            /* Mask out external interrupts for this step. */
 720            interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
 721        }
 722        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
 723            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
 724            cpu->exception_index = EXCP_DEBUG;
 725            qemu_mutex_unlock_iothread();
 726            return true;
 727        }
 728        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
 729            /* Do nothing */
 730        } else if (interrupt_request & CPU_INTERRUPT_HALT) {
 731            replay_interrupt();
 732            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
 733            cpu->halted = 1;
 734            cpu->exception_index = EXCP_HLT;
 735            qemu_mutex_unlock_iothread();
 736            return true;
 737        }
 738#if defined(TARGET_I386)
 739        else if (interrupt_request & CPU_INTERRUPT_INIT) {
 740            X86CPU *x86_cpu = X86_CPU(cpu);
 741            CPUArchState *env = &x86_cpu->env;
 742            replay_interrupt();
 743            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
 744            do_cpu_init(x86_cpu);
 745            cpu->exception_index = EXCP_HALTED;
 746            qemu_mutex_unlock_iothread();
 747            return true;
 748        }
 749#else
 750        else if (interrupt_request & CPU_INTERRUPT_RESET) {
 751            replay_interrupt();
 752            cpu_reset(cpu);
 753            qemu_mutex_unlock_iothread();
 754            return true;
 755        }
 756#endif
 757        /* The target hook has 3 exit conditions:
 758           False when the interrupt isn't processed,
 759           True when it is, and we should restart on a new TB,
 760           and via longjmp via cpu_loop_exit.  */
 761        else {
 762            if (cc->tcg_ops->cpu_exec_interrupt &&
 763                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
 764                if (need_replay_interrupt(interrupt_request)) {
 765                    replay_interrupt();
 766                }
 767                /*
 768                 * After processing the interrupt, ensure an EXCP_DEBUG is
 769                 * raised when single-stepping so that GDB doesn't miss the
 770                 * next instruction.
 771                 */
 772                cpu->exception_index =
 773                    (cpu->singlestep_enabled ? EXCP_DEBUG : -1);
 774                *last_tb = NULL;
 775            }
 776            /* The target hook may have updated the 'cpu->interrupt_request';
 777             * reload the 'interrupt_request' value */
 778            interrupt_request = cpu->interrupt_request;
 779        }
 780        if (interrupt_request & CPU_INTERRUPT_EXITTB) {
 781            cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
 782            /* ensure that no TB jump will be modified as
 783               the program flow was changed */
 784            *last_tb = NULL;
 785        }
 786
 787        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
 788        qemu_mutex_unlock_iothread();
 789    }
 790
 791    /* Finally, check if we need to exit to the main loop.  */
 792    if (unlikely(qatomic_read(&cpu->exit_request))
 793        || (icount_enabled()
 794            && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
 795            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
 796        qatomic_set(&cpu->exit_request, 0);
 797        if (cpu->exception_index == -1) {
 798            cpu->exception_index = EXCP_INTERRUPT;
 799        }
 800        return true;
 801    }
 802
 803    return false;
 804}
 805
 806static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 807                                    TranslationBlock **last_tb, int *tb_exit)
 808{
 809    int32_t insns_left;
 810
 811    trace_exec_tb(tb, tb->pc);
 812    tb = cpu_tb_exec(cpu, tb, tb_exit);
 813    if (*tb_exit != TB_EXIT_REQUESTED) {
 814        *last_tb = tb;
 815        return;
 816    }
 817
 818    *last_tb = NULL;
 819    insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
 820    if (insns_left < 0) {
 821        /* Something asked us to stop executing chained TBs; just
 822         * continue round the main loop. Whatever requested the exit
 823         * will also have set something else (eg exit_request or
 824         * interrupt_request) which will be handled by
 825         * cpu_handle_interrupt.  cpu_handle_interrupt will also
 826         * clear cpu->icount_decr.u16.high.
 827         */
 828        return;
 829    }
 830
 831    /* Instruction counter expired.  */
 832    assert(icount_enabled());
 833#ifndef CONFIG_USER_ONLY
 834    /* Ensure global icount has gone forward */
 835    icount_update(cpu);
 836    /* Refill decrementer and continue execution.  */
 837    insns_left = MIN(0xffff, cpu->icount_budget);
 838    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 839    cpu->icount_extra = cpu->icount_budget - insns_left;
 840
 841    /*
 842     * If the next tb has more instructions than we have left to
 843     * execute we need to ensure we find/generate a TB with exactly
 844     * insns_left instructions in it.
 845     */
 846    if (insns_left > 0 && insns_left < tb->icount)  {
 847        assert(insns_left <= CF_COUNT_MASK);
 848        assert(cpu->icount_extra == 0);
 849        cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
 850    }
 851#endif
 852}
 853
 854/* main execution loop */
 855
 856int cpu_exec(CPUState *cpu)
 857{
 858    int ret;
 859    SyncClocks sc = { 0 };
 860
 861    /* replay_interrupt may need current_cpu */
 862    current_cpu = cpu;
 863
 864    if (cpu_handle_halt(cpu)) {
 865        return EXCP_HALTED;
 866    }
 867
 868    rcu_read_lock();
 869
 870    cpu_exec_enter(cpu);
 871
 872    /* Calculate difference between guest clock and host clock.
 873     * This delay includes the delay of the last cycle, so
 874     * what we have to do is sleep until it is 0. As for the
 875     * advance/delay we gain here, we try to fix it next time.
 876     */
 877    init_delay_params(&sc, cpu);
 878
 879    /* prepare setjmp context for exception handling */
 880    if (sigsetjmp(cpu->jmp_env, 0) != 0) {
 881#if defined(__clang__)
 882        /*
 883         * Some compilers wrongly smash all local variables after
 884         * siglongjmp (the spec requires that only non-volatile locals
 885         * which are changed between the sigsetjmp and siglongjmp are
 886         * permitted to be trashed). There were bug reports for gcc
 887         * 4.5.0 and clang.  The bug is fixed in all versions of gcc
 888         * that we support, but is still unfixed in clang:
 889         *   https://bugs.llvm.org/show_bug.cgi?id=21183
 890         *
 891         * Reload an essential local variable here for those compilers.
 892         * Newer versions of gcc would complain about this code (-Wclobbered),
 893         * so we only perform the workaround for clang.
 894         */
 895        cpu = current_cpu;
 896#else
 897        /* Non-buggy compilers preserve this; assert the correct value. */
 898        g_assert(cpu == current_cpu);
 899#endif
 900
 901#ifndef CONFIG_SOFTMMU
 902        tcg_debug_assert(!have_mmap_lock());
 903#endif
 904        if (qemu_mutex_iothread_locked()) {
 905            qemu_mutex_unlock_iothread();
 906        }
 907        qemu_plugin_disable_mem_helpers(cpu);
 908
 909        assert_no_pages_locked();
 910    }
 911
 912    /* if an exception is pending, we execute it here */
 913    while (!cpu_handle_exception(cpu, &ret)) {
 914        TranslationBlock *last_tb = NULL;
 915        int tb_exit = 0;
 916
 917        while (!cpu_handle_interrupt(cpu, &last_tb)) {
 918            TranslationBlock *tb;
 919            target_ulong cs_base, pc;
 920            uint32_t flags, cflags;
 921
 922            cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
 923
 924            /*
 925             * When requested, use an exact setting for cflags for the next
 926             * execution.  This is used for icount, precise smc, and stop-
 927             * after-access watchpoints.  Since this request should never
 928             * have CF_INVALID set, -1 is a convenient invalid value that
 929             * does not require tcg headers for cpu_common_reset.
 930             */
 931            cflags = cpu->cflags_next_tb;
 932            if (cflags == -1) {
 933                cflags = curr_cflags(cpu);
 934            } else {
 935                cpu->cflags_next_tb = -1;
 936            }
 937
 938            if (check_for_breakpoints(cpu, pc, &cflags)) {
 939                break;
 940            }
 941
 942            tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 943            if (tb == NULL) {
 944                mmap_lock();
 945                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 946                mmap_unlock();
 947                /*
 948                 * We add the TB in the virtual pc hash table
 949                 * for the fast lookup
 950                 */
 951                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
 952            }
 953
 954#ifndef CONFIG_USER_ONLY
 955            /*
 956             * We don't take care of direct jumps when address mapping
 957             * changes in system emulation.  So it's not safe to make a
 958             * direct jump to a TB spanning two pages because the mapping
 959             * for the second page can change.
 960             */
 961            if (tb->page_addr[1] != -1) {
 962                last_tb = NULL;
 963            }
 964#endif
 965            /* See if we can patch the calling TB. */
 966            if (last_tb) {
 967                tb_add_jump(last_tb, tb_exit, tb);
 968            }
 969
 970            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
 971
 972            /* Try to align the host and virtual clocks
 973               if the guest is in advance */
 974            align_clocks(&sc, cpu);
 975        }
 976    }
 977
 978    cpu_exec_exit(cpu);
 979    rcu_read_unlock();
 980
 981    return ret;
 982}
 983
 984void tcg_exec_realizefn(CPUState *cpu, Error **errp)
 985{
 986    static bool tcg_target_initialized;
 987    CPUClass *cc = CPU_GET_CLASS(cpu);
 988
 989    if (!tcg_target_initialized) {
 990        cc->tcg_ops->initialize();
 991        tcg_target_initialized = true;
 992    }
 993    tlb_init(cpu);
 994    qemu_plugin_vcpu_init_hook(cpu);
 995
 996#ifndef CONFIG_USER_ONLY
 997    tcg_iommu_init_notifier_list(cpu);
 998#endif /* !CONFIG_USER_ONLY */
 999}
1000
1001/* undo the initializations in reverse order */
1002void tcg_exec_unrealizefn(CPUState *cpu)
1003{
1004#ifndef CONFIG_USER_ONLY
1005    tcg_iommu_free_notifier_list(cpu);
1006#endif /* !CONFIG_USER_ONLY */
1007
1008    qemu_plugin_vcpu_exit_hook(cpu);
1009    tlb_destroy(cpu);
1010}
1011
1012#ifndef CONFIG_USER_ONLY
1013
1014void dump_drift_info(void)
1015{
1016    if (!icount_enabled()) {
1017        return;
1018    }
1019
1020    qemu_printf("Host - Guest clock  %"PRIi64" ms\n",
1021                (cpu_get_clock() - icount_get()) / SCALE_MS);
1022    if (icount_align_option) {
1023        qemu_printf("Max guest delay     %"PRIi64" ms\n",
1024                    -max_delay / SCALE_MS);
1025        qemu_printf("Max guest advance   %"PRIi64" ms\n",
1026                    max_advance / SCALE_MS);
1027    } else {
1028        qemu_printf("Max guest delay     NA\n");
1029        qemu_printf("Max guest advance   NA\n");
1030    }
1031}
1032
1033#endif /* !CONFIG_USER_ONLY */
1034