qemu/accel/tcg/cpu-exec.c
<<
>>
Prefs
   1/*
   2 *  emulator main execution loop
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22#include "qemu/qemu-print.h"
  23#include "qapi/error.h"
  24#include "qapi/qapi-commands-machine.h"
  25#include "qapi/type-helpers.h"
  26#include "hw/core/tcg-cpu-ops.h"
  27#include "trace.h"
  28#include "disas/disas.h"
  29#include "exec/exec-all.h"
  30#include "tcg/tcg.h"
  31#include "qemu/atomic.h"
  32#include "qemu/compiler.h"
  33#include "qemu/timer.h"
  34#include "qemu/rcu.h"
  35#include "exec/log.h"
  36#include "qemu/main-loop.h"
  37#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  38#include "hw/i386/apic.h"
  39#endif
  40#include "sysemu/cpus.h"
  41#include "exec/cpu-all.h"
  42#include "sysemu/cpu-timers.h"
  43#include "sysemu/replay.h"
  44#include "sysemu/tcg.h"
  45#include "exec/helper-proto.h"
  46#include "tb-hash.h"
  47#include "tb-context.h"
  48#include "internal.h"
  49
  50/* -icount align implementation. */
  51
  52typedef struct SyncClocks {
  53    int64_t diff_clk;
  54    int64_t last_cpu_icount;
  55    int64_t realtime_clock;
  56} SyncClocks;
  57
  58#if !defined(CONFIG_USER_ONLY)
  59/* Allow the guest to have a max 3ms advance.
  60 * The difference between the 2 clocks could therefore
  61 * oscillate around 0.
  62 */
  63#define VM_CLOCK_ADVANCE 3000000
  64#define THRESHOLD_REDUCE 1.5
  65#define MAX_DELAY_PRINT_RATE 2000000000LL
  66#define MAX_NB_PRINTS 100
  67
  68static int64_t max_delay;
  69static int64_t max_advance;
  70
  71static void align_clocks(SyncClocks *sc, CPUState *cpu)
  72{
  73    int64_t cpu_icount;
  74
  75    if (!icount_align_option) {
  76        return;
  77    }
  78
  79    cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
  80    sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  81    sc->last_cpu_icount = cpu_icount;
  82
  83    if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  84#ifndef _WIN32
  85        struct timespec sleep_delay, rem_delay;
  86        sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  87        sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  88        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  89            sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  90        } else {
  91            sc->diff_clk = 0;
  92        }
  93#else
  94        Sleep(sc->diff_clk / SCALE_MS);
  95        sc->diff_clk = 0;
  96#endif
  97    }
  98}
  99
 100static void print_delay(const SyncClocks *sc)
 101{
 102    static float threshold_delay;
 103    static int64_t last_realtime_clock;
 104    static int nb_prints;
 105
 106    if (icount_align_option &&
 107        sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
 108        nb_prints < MAX_NB_PRINTS) {
 109        if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
 110            (-sc->diff_clk / (float)1000000000LL <
 111             (threshold_delay - THRESHOLD_REDUCE))) {
 112            threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
 113            qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
 114                        threshold_delay - 1,
 115                        threshold_delay);
 116            nb_prints++;
 117            last_realtime_clock = sc->realtime_clock;
 118        }
 119    }
 120}
 121
 122static void init_delay_params(SyncClocks *sc, CPUState *cpu)
 123{
 124    if (!icount_align_option) {
 125        return;
 126    }
 127    sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 128    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
 129    sc->last_cpu_icount
 130        = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
 131    if (sc->diff_clk < max_delay) {
 132        max_delay = sc->diff_clk;
 133    }
 134    if (sc->diff_clk > max_advance) {
 135        max_advance = sc->diff_clk;
 136    }
 137
 138    /* Print every 2s max if the guest is late. We limit the number
 139       of printed messages to NB_PRINT_MAX(currently 100) */
 140    print_delay(sc);
 141}
 142#else
 143static void align_clocks(SyncClocks *sc, const CPUState *cpu)
 144{
 145}
 146
 147static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 148{
 149}
 150#endif /* CONFIG USER ONLY */
 151
 152uint32_t curr_cflags(CPUState *cpu)
 153{
 154    uint32_t cflags = cpu->tcg_cflags;
 155
 156    /*
 157     * Record gdb single-step.  We should be exiting the TB by raising
 158     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
 159     *
 160     * For singlestep and -d nochain, suppress goto_tb so that
 161     * we can log -d cpu,exec after every TB.
 162     */
 163    if (unlikely(cpu->singlestep_enabled)) {
 164        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
 165    } else if (singlestep) {
 166        cflags |= CF_NO_GOTO_TB | 1;
 167    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
 168        cflags |= CF_NO_GOTO_TB;
 169    }
 170
 171    return cflags;
 172}
 173
 174/* Might cause an exception, so have a longjmp destination ready */
 175static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
 176                                          target_ulong cs_base,
 177                                          uint32_t flags, uint32_t cflags)
 178{
 179    TranslationBlock *tb;
 180    uint32_t hash;
 181
 182    /* we should never be trying to look up an INVALID tb */
 183    tcg_debug_assert(!(cflags & CF_INVALID));
 184
 185    hash = tb_jmp_cache_hash_func(pc);
 186    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 187
 188    if (likely(tb &&
 189               tb->pc == pc &&
 190               tb->cs_base == cs_base &&
 191               tb->flags == flags &&
 192               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 193               tb_cflags(tb) == cflags)) {
 194        return tb;
 195    }
 196    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 197    if (tb == NULL) {
 198        return NULL;
 199    }
 200    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
 201    return tb;
 202}
 203
 204static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
 205                                const TranslationBlock *tb)
 206{
 207    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
 208        && qemu_log_in_addr_range(pc)) {
 209
 210        qemu_log_mask(CPU_LOG_EXEC,
 211                      "Trace %d: %p [" TARGET_FMT_lx
 212                      "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
 213                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
 214                      tb->flags, tb->cflags, lookup_symbol(pc));
 215
 216#if defined(DEBUG_DISAS)
 217        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
 218            FILE *logfile = qemu_log_lock();
 219            int flags = 0;
 220
 221            if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
 222                flags |= CPU_DUMP_FPU;
 223            }
 224#if defined(TARGET_I386)
 225            flags |= CPU_DUMP_CCOP;
 226#endif
 227            log_cpu_state(cpu, flags);
 228            qemu_log_unlock(logfile);
 229        }
 230#endif /* DEBUG_DISAS */
 231    }
 232}
 233
 234static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
 235                                  uint32_t *cflags)
 236{
 237    CPUBreakpoint *bp;
 238    bool match_page = false;
 239
 240    if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
 241        return false;
 242    }
 243
 244    /*
 245     * Singlestep overrides breakpoints.
 246     * This requirement is visible in the record-replay tests, where
 247     * we would fail to make forward progress in reverse-continue.
 248     *
 249     * TODO: gdb singlestep should only override gdb breakpoints,
 250     * so that one could (gdb) singlestep into the guest kernel's
 251     * architectural breakpoint handler.
 252     */
 253    if (cpu->singlestep_enabled) {
 254        return false;
 255    }
 256
 257    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 258        /*
 259         * If we have an exact pc match, trigger the breakpoint.
 260         * Otherwise, note matches within the page.
 261         */
 262        if (pc == bp->pc) {
 263            bool match_bp = false;
 264
 265            if (bp->flags & BP_GDB) {
 266                match_bp = true;
 267            } else if (bp->flags & BP_CPU) {
 268#ifdef CONFIG_USER_ONLY
 269                g_assert_not_reached();
 270#else
 271                CPUClass *cc = CPU_GET_CLASS(cpu);
 272                assert(cc->tcg_ops->debug_check_breakpoint);
 273                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 274#endif
 275            }
 276
 277            if (match_bp) {
 278                cpu->exception_index = EXCP_DEBUG;
 279                return true;
 280            }
 281        } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
 282            match_page = true;
 283        }
 284    }
 285
 286    /*
 287     * Within the same page as a breakpoint, single-step,
 288     * returning to helper_lookup_tb_ptr after each insn looking
 289     * for the actual breakpoint.
 290     *
 291     * TODO: Perhaps better to record all of the TBs associated
 292     * with a given virtual page that contains a breakpoint, and
 293     * then invalidate them when a new overlapping breakpoint is
 294     * set on the page.  Non-overlapping TBs would not be
 295     * invalidated, nor would any TB need to be invalidated as
 296     * breakpoints are removed.
 297     */
 298    if (match_page) {
 299        *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
 300    }
 301    return false;
 302}
 303
 304/**
 305 * helper_lookup_tb_ptr: quick check for next tb
 306 * @env: current cpu state
 307 *
 308 * Look for an existing TB matching the current cpu state.
 309 * If found, return the code pointer.  If not found, return
 310 * the tcg epilogue so that we return into cpu_tb_exec.
 311 */
 312const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 313{
 314    CPUState *cpu = env_cpu(env);
 315    TranslationBlock *tb;
 316    target_ulong cs_base, pc;
 317    uint32_t flags, cflags;
 318
 319    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 320
 321    cflags = curr_cflags(cpu);
 322    if (check_for_breakpoints(cpu, pc, &cflags)) {
 323        cpu_loop_exit(cpu);
 324    }
 325
 326    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 327    if (tb == NULL) {
 328        return tcg_code_gen_epilogue;
 329    }
 330
 331    log_cpu_exec(pc, cpu, tb);
 332
 333    return tb->tc.ptr;
 334}
 335
 336/* Execute a TB, and fix up the CPU state afterwards if necessary */
 337/*
 338 * Disable CFI checks.
 339 * TCG creates binary blobs at runtime, with the transformed code.
 340 * A TB is a blob of binary code, created at runtime and called with an
 341 * indirect function call. Since such function did not exist at compile time,
 342 * the CFI runtime has no way to verify its signature and would fail.
 343 * TCG is not considered a security-sensitive part of QEMU so this does not
 344 * affect the impact of CFI in environment with high security requirements
 345 */
 346static inline TranslationBlock * QEMU_DISABLE_CFI
 347cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 348{
 349    CPUArchState *env = cpu->env_ptr;
 350    uintptr_t ret;
 351    TranslationBlock *last_tb;
 352    const void *tb_ptr = itb->tc.ptr;
 353
 354    log_cpu_exec(itb->pc, cpu, itb);
 355
 356    qemu_thread_jit_execute();
 357    ret = tcg_qemu_tb_exec(env, tb_ptr);
 358    cpu->can_do_io = 1;
 359    /*
 360     * TODO: Delay swapping back to the read-write region of the TB
 361     * until we actually need to modify the TB.  The read-only copy,
 362     * coming from the rx region, shares the same host TLB entry as
 363     * the code that executed the exit_tb opcode that arrived here.
 364     * If we insist on touching both the RX and the RW pages, we
 365     * double the host TLB pressure.
 366     */
 367    last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
 368    *tb_exit = ret & TB_EXIT_MASK;
 369
 370    trace_exec_tb_exit(last_tb, *tb_exit);
 371
 372    if (*tb_exit > TB_EXIT_IDX1) {
 373        /* We didn't start executing this TB (eg because the instruction
 374         * counter hit zero); we must restore the guest PC to the address
 375         * of the start of the TB.
 376         */
 377        CPUClass *cc = CPU_GET_CLASS(cpu);
 378        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
 379                               "Stopped execution of TB chain before %p ["
 380                               TARGET_FMT_lx "] %s\n",
 381                               last_tb->tc.ptr, last_tb->pc,
 382                               lookup_symbol(last_tb->pc));
 383        if (cc->tcg_ops->synchronize_from_tb) {
 384            cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
 385        } else {
 386            assert(cc->set_pc);
 387            cc->set_pc(cpu, last_tb->pc);
 388        }
 389    }
 390
 391    /*
 392     * If gdb single-step, and we haven't raised another exception,
 393     * raise a debug exception.  Single-step with another exception
 394     * is handled in cpu_handle_exception.
 395     */
 396    if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
 397        cpu->exception_index = EXCP_DEBUG;
 398        cpu_loop_exit(cpu);
 399    }
 400
 401    return last_tb;
 402}
 403
 404
 405static void cpu_exec_enter(CPUState *cpu)
 406{
 407    CPUClass *cc = CPU_GET_CLASS(cpu);
 408
 409    if (cc->tcg_ops->cpu_exec_enter) {
 410        cc->tcg_ops->cpu_exec_enter(cpu);
 411    }
 412}
 413
 414static void cpu_exec_exit(CPUState *cpu)
 415{
 416    CPUClass *cc = CPU_GET_CLASS(cpu);
 417
 418    if (cc->tcg_ops->cpu_exec_exit) {
 419        cc->tcg_ops->cpu_exec_exit(cpu);
 420    }
 421}
 422
 423void cpu_exec_step_atomic(CPUState *cpu)
 424{
 425    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
 426    TranslationBlock *tb;
 427    target_ulong cs_base, pc;
 428    uint32_t flags, cflags;
 429    int tb_exit;
 430
 431    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
 432        start_exclusive();
 433        g_assert(cpu == current_cpu);
 434        g_assert(!cpu->running);
 435        cpu->running = true;
 436
 437        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 438
 439        cflags = curr_cflags(cpu);
 440        /* Execute in a serial context. */
 441        cflags &= ~CF_PARALLEL;
 442        /* After 1 insn, return and release the exclusive lock. */
 443        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
 444        /*
 445         * No need to check_for_breakpoints here.
 446         * We only arrive in cpu_exec_step_atomic after beginning execution
 447         * of an insn that includes an atomic operation we can't handle.
 448         * Any breakpoint for this insn will have been recognized earlier.
 449         */
 450
 451        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 452        if (tb == NULL) {
 453            mmap_lock();
 454            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 455            mmap_unlock();
 456        }
 457
 458        cpu_exec_enter(cpu);
 459        /* execute the generated code */
 460        trace_exec_tb(tb, pc);
 461        cpu_tb_exec(cpu, tb, &tb_exit);
 462        cpu_exec_exit(cpu);
 463    } else {
 464        /*
 465         * The mmap_lock is dropped by tb_gen_code if it runs out of
 466         * memory.
 467         */
 468#ifndef CONFIG_SOFTMMU
 469        clear_helper_retaddr();
 470        tcg_debug_assert(!have_mmap_lock());
 471#endif
 472        if (qemu_mutex_iothread_locked()) {
 473            qemu_mutex_unlock_iothread();
 474        }
 475        assert_no_pages_locked();
 476        qemu_plugin_disable_mem_helpers(cpu);
 477    }
 478
 479    /*
 480     * As we start the exclusive region before codegen we must still
 481     * be in the region if we longjump out of either the codegen or
 482     * the execution.
 483     */
 484    g_assert(cpu_in_exclusive_context(cpu));
 485    cpu->running = false;
 486    end_exclusive();
 487}
 488
 489struct tb_desc {
 490    target_ulong pc;
 491    target_ulong cs_base;
 492    CPUArchState *env;
 493    tb_page_addr_t phys_page1;
 494    uint32_t flags;
 495    uint32_t cflags;
 496    uint32_t trace_vcpu_dstate;
 497};
 498
 499static bool tb_lookup_cmp(const void *p, const void *d)
 500{
 501    const TranslationBlock *tb = p;
 502    const struct tb_desc *desc = d;
 503
 504    if (tb->pc == desc->pc &&
 505        tb->page_addr[0] == desc->phys_page1 &&
 506        tb->cs_base == desc->cs_base &&
 507        tb->flags == desc->flags &&
 508        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 509        tb_cflags(tb) == desc->cflags) {
 510        /* check next page if needed */
 511        if (tb->page_addr[1] == -1) {
 512            return true;
 513        } else {
 514            tb_page_addr_t phys_page2;
 515            target_ulong virt_page2;
 516
 517            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 518            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 519            if (tb->page_addr[1] == phys_page2) {
 520                return true;
 521            }
 522        }
 523    }
 524    return false;
 525}
 526
 527TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 528                                   target_ulong cs_base, uint32_t flags,
 529                                   uint32_t cflags)
 530{
 531    tb_page_addr_t phys_pc;
 532    struct tb_desc desc;
 533    uint32_t h;
 534
 535    desc.env = (CPUArchState *)cpu->env_ptr;
 536    desc.cs_base = cs_base;
 537    desc.flags = flags;
 538    desc.cflags = cflags;
 539    desc.trace_vcpu_dstate = *cpu->trace_dstate;
 540    desc.pc = pc;
 541    phys_pc = get_page_addr_code(desc.env, pc);
 542    if (phys_pc == -1) {
 543        return NULL;
 544    }
 545    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
 546    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
 547    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 548}
 549
 550void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 551{
 552    if (TCG_TARGET_HAS_direct_jump) {
 553        uintptr_t offset = tb->jmp_target_arg[n];
 554        uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
 555        uintptr_t jmp_rx = tc_ptr + offset;
 556        uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
 557        tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr);
 558    } else {
 559        tb->jmp_target_arg[n] = addr;
 560    }
 561}
 562
 563static inline void tb_add_jump(TranslationBlock *tb, int n,
 564                               TranslationBlock *tb_next)
 565{
 566    uintptr_t old;
 567
 568    qemu_thread_jit_write();
 569    assert(n < ARRAY_SIZE(tb->jmp_list_next));
 570    qemu_spin_lock(&tb_next->jmp_lock);
 571
 572    /* make sure the destination TB is valid */
 573    if (tb_next->cflags & CF_INVALID) {
 574        goto out_unlock_next;
 575    }
 576    /* Atomically claim the jump destination slot only if it was NULL */
 577    old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
 578                          (uintptr_t)tb_next);
 579    if (old) {
 580        goto out_unlock_next;
 581    }
 582
 583    /* patch the native jump address */
 584    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
 585
 586    /* add in TB jmp list */
 587    tb->jmp_list_next[n] = tb_next->jmp_list_head;
 588    tb_next->jmp_list_head = (uintptr_t)tb | n;
 589
 590    qemu_spin_unlock(&tb_next->jmp_lock);
 591
 592    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
 593                           "Linking TBs %p [" TARGET_FMT_lx
 594                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
 595                           tb->tc.ptr, tb->pc, n,
 596                           tb_next->tc.ptr, tb_next->pc);
 597    return;
 598
 599 out_unlock_next:
 600    qemu_spin_unlock(&tb_next->jmp_lock);
 601    return;
 602}
 603
 604static inline bool cpu_handle_halt(CPUState *cpu)
 605{
 606#ifndef CONFIG_USER_ONLY
 607    if (cpu->halted) {
 608#if defined(TARGET_I386)
 609        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 610            X86CPU *x86_cpu = X86_CPU(cpu);
 611            qemu_mutex_lock_iothread();
 612            apic_poll_irq(x86_cpu->apic_state);
 613            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
 614            qemu_mutex_unlock_iothread();
 615        }
 616#endif /* TARGET_I386 */
 617        if (!cpu_has_work(cpu)) {
 618            return true;
 619        }
 620
 621        cpu->halted = 0;
 622    }
 623#endif /* !CONFIG_USER_ONLY */
 624
 625    return false;
 626}
 627
 628static inline void cpu_handle_debug_exception(CPUState *cpu)
 629{
 630    CPUClass *cc = CPU_GET_CLASS(cpu);
 631    CPUWatchpoint *wp;
 632
 633    if (!cpu->watchpoint_hit) {
 634        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 635            wp->flags &= ~BP_WATCHPOINT_HIT;
 636        }
 637    }
 638
 639    if (cc->tcg_ops->debug_excp_handler) {
 640        cc->tcg_ops->debug_excp_handler(cpu);
 641    }
 642}
 643
 644static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 645{
 646    if (cpu->exception_index < 0) {
 647#ifndef CONFIG_USER_ONLY
 648        if (replay_has_exception()
 649            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
 650            /* Execute just one insn to trigger exception pending in the log */
 651            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
 652        }
 653#endif
 654        return false;
 655    }
 656    if (cpu->exception_index >= EXCP_INTERRUPT) {
 657        /* exit request from the cpu execution loop */
 658        *ret = cpu->exception_index;
 659        if (*ret == EXCP_DEBUG) {
 660            cpu_handle_debug_exception(cpu);
 661        }
 662        cpu->exception_index = -1;
 663        return true;
 664    } else {
 665#if defined(CONFIG_USER_ONLY)
 666        /* if user mode only, we simulate a fake exception
 667           which will be handled outside the cpu execution
 668           loop */
 669#if defined(TARGET_I386)
 670        CPUClass *cc = CPU_GET_CLASS(cpu);
 671        cc->tcg_ops->fake_user_interrupt(cpu);
 672#endif /* TARGET_I386 */
 673        *ret = cpu->exception_index;
 674        cpu->exception_index = -1;
 675        return true;
 676#else
 677        if (replay_exception()) {
 678            CPUClass *cc = CPU_GET_CLASS(cpu);
 679            qemu_mutex_lock_iothread();
 680            cc->tcg_ops->do_interrupt(cpu);
 681            qemu_mutex_unlock_iothread();
 682            cpu->exception_index = -1;
 683
 684            if (unlikely(cpu->singlestep_enabled)) {
 685                /*
 686                 * After processing the exception, ensure an EXCP_DEBUG is
 687                 * raised when single-stepping so that GDB doesn't miss the
 688                 * next instruction.
 689                 */
 690                *ret = EXCP_DEBUG;
 691                cpu_handle_debug_exception(cpu);
 692                return true;
 693            }
 694        } else if (!replay_has_interrupt()) {
 695            /* give a chance to iothread in replay mode */
 696            *ret = EXCP_INTERRUPT;
 697            return true;
 698        }
 699#endif
 700    }
 701
 702    return false;
 703}
 704
 705#ifndef CONFIG_USER_ONLY
 706/*
 707 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 708 * "real" interrupt event later. It does not need to be recorded for
 709 * replay purposes.
 710 */
 711static inline bool need_replay_interrupt(int interrupt_request)
 712{
 713#if defined(TARGET_I386)
 714    return !(interrupt_request & CPU_INTERRUPT_POLL);
 715#else
 716    return true;
 717#endif
 718}
 719#endif /* !CONFIG_USER_ONLY */
 720
 721static inline bool cpu_handle_interrupt(CPUState *cpu,
 722                                        TranslationBlock **last_tb)
 723{
 724    /*
 725     * If we have requested custom cflags with CF_NOIRQ we should
 726     * skip checking here. Any pending interrupts will get picked up
 727     * by the next TB we execute under normal cflags.
 728     */
 729    if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
 730        return false;
 731    }
 732
 733    /* Clear the interrupt flag now since we're processing
 734     * cpu->interrupt_request and cpu->exit_request.
 735     * Ensure zeroing happens before reading cpu->exit_request or
 736     * cpu->interrupt_request (see also smp_wmb in cpu_exit())
 737     */
 738    qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
 739
 740    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
 741        int interrupt_request;
 742        qemu_mutex_lock_iothread();
 743        interrupt_request = cpu->interrupt_request;
 744        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
 745            /* Mask out external interrupts for this step. */
 746            interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
 747        }
 748        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
 749            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
 750            cpu->exception_index = EXCP_DEBUG;
 751            qemu_mutex_unlock_iothread();
 752            return true;
 753        }
 754#if !defined(CONFIG_USER_ONLY)
 755        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
 756            /* Do nothing */
 757        } else if (interrupt_request & CPU_INTERRUPT_HALT) {
 758            replay_interrupt();
 759            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
 760            cpu->halted = 1;
 761            cpu->exception_index = EXCP_HLT;
 762            qemu_mutex_unlock_iothread();
 763            return true;
 764        }
 765#if defined(TARGET_I386)
 766        else if (interrupt_request & CPU_INTERRUPT_INIT) {
 767            X86CPU *x86_cpu = X86_CPU(cpu);
 768            CPUArchState *env = &x86_cpu->env;
 769            replay_interrupt();
 770            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
 771            do_cpu_init(x86_cpu);
 772            cpu->exception_index = EXCP_HALTED;
 773            qemu_mutex_unlock_iothread();
 774            return true;
 775        }
 776#else
 777        else if (interrupt_request & CPU_INTERRUPT_RESET) {
 778            replay_interrupt();
 779            cpu_reset(cpu);
 780            qemu_mutex_unlock_iothread();
 781            return true;
 782        }
 783#endif /* !TARGET_I386 */
 784        /* The target hook has 3 exit conditions:
 785           False when the interrupt isn't processed,
 786           True when it is, and we should restart on a new TB,
 787           and via longjmp via cpu_loop_exit.  */
 788        else {
 789            CPUClass *cc = CPU_GET_CLASS(cpu);
 790
 791            if (cc->tcg_ops->cpu_exec_interrupt &&
 792                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
 793                if (need_replay_interrupt(interrupt_request)) {
 794                    replay_interrupt();
 795                }
 796                /*
 797                 * After processing the interrupt, ensure an EXCP_DEBUG is
 798                 * raised when single-stepping so that GDB doesn't miss the
 799                 * next instruction.
 800                 */
 801                cpu->exception_index =
 802                    (cpu->singlestep_enabled ? EXCP_DEBUG : -1);
 803                *last_tb = NULL;
 804            }
 805            /* The target hook may have updated the 'cpu->interrupt_request';
 806             * reload the 'interrupt_request' value */
 807            interrupt_request = cpu->interrupt_request;
 808        }
 809#endif /* !CONFIG_USER_ONLY */
 810        if (interrupt_request & CPU_INTERRUPT_EXITTB) {
 811            cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
 812            /* ensure that no TB jump will be modified as
 813               the program flow was changed */
 814            *last_tb = NULL;
 815        }
 816
 817        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
 818        qemu_mutex_unlock_iothread();
 819    }
 820
 821    /* Finally, check if we need to exit to the main loop.  */
 822    if (unlikely(qatomic_read(&cpu->exit_request))
 823        || (icount_enabled()
 824            && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
 825            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
 826        qatomic_set(&cpu->exit_request, 0);
 827        if (cpu->exception_index == -1) {
 828            cpu->exception_index = EXCP_INTERRUPT;
 829        }
 830        return true;
 831    }
 832
 833    return false;
 834}
 835
 836static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 837                                    TranslationBlock **last_tb, int *tb_exit)
 838{
 839    int32_t insns_left;
 840
 841    trace_exec_tb(tb, tb->pc);
 842    tb = cpu_tb_exec(cpu, tb, tb_exit);
 843    if (*tb_exit != TB_EXIT_REQUESTED) {
 844        *last_tb = tb;
 845        return;
 846    }
 847
 848    *last_tb = NULL;
 849    insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
 850    if (insns_left < 0) {
 851        /* Something asked us to stop executing chained TBs; just
 852         * continue round the main loop. Whatever requested the exit
 853         * will also have set something else (eg exit_request or
 854         * interrupt_request) which will be handled by
 855         * cpu_handle_interrupt.  cpu_handle_interrupt will also
 856         * clear cpu->icount_decr.u16.high.
 857         */
 858        return;
 859    }
 860
 861    /* Instruction counter expired.  */
 862    assert(icount_enabled());
 863#ifndef CONFIG_USER_ONLY
 864    /* Ensure global icount has gone forward */
 865    icount_update(cpu);
 866    /* Refill decrementer and continue execution.  */
 867    insns_left = MIN(0xffff, cpu->icount_budget);
 868    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 869    cpu->icount_extra = cpu->icount_budget - insns_left;
 870
 871    /*
 872     * If the next tb has more instructions than we have left to
 873     * execute we need to ensure we find/generate a TB with exactly
 874     * insns_left instructions in it.
 875     */
 876    if (insns_left > 0 && insns_left < tb->icount)  {
 877        assert(insns_left <= CF_COUNT_MASK);
 878        assert(cpu->icount_extra == 0);
 879        cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
 880    }
 881#endif
 882}
 883
 884/* main execution loop */
 885
 886int cpu_exec(CPUState *cpu)
 887{
 888    int ret;
 889    SyncClocks sc = { 0 };
 890
 891    /* replay_interrupt may need current_cpu */
 892    current_cpu = cpu;
 893
 894    if (cpu_handle_halt(cpu)) {
 895        return EXCP_HALTED;
 896    }
 897
 898    rcu_read_lock();
 899
 900    cpu_exec_enter(cpu);
 901
 902    /* Calculate difference between guest clock and host clock.
 903     * This delay includes the delay of the last cycle, so
 904     * what we have to do is sleep until it is 0. As for the
 905     * advance/delay we gain here, we try to fix it next time.
 906     */
 907    init_delay_params(&sc, cpu);
 908
 909    /* prepare setjmp context for exception handling */
 910    if (sigsetjmp(cpu->jmp_env, 0) != 0) {
 911#if defined(__clang__)
 912        /*
 913         * Some compilers wrongly smash all local variables after
 914         * siglongjmp (the spec requires that only non-volatile locals
 915         * which are changed between the sigsetjmp and siglongjmp are
 916         * permitted to be trashed). There were bug reports for gcc
 917         * 4.5.0 and clang.  The bug is fixed in all versions of gcc
 918         * that we support, but is still unfixed in clang:
 919         *   https://bugs.llvm.org/show_bug.cgi?id=21183
 920         *
 921         * Reload an essential local variable here for those compilers.
 922         * Newer versions of gcc would complain about this code (-Wclobbered),
 923         * so we only perform the workaround for clang.
 924         */
 925        cpu = current_cpu;
 926#else
 927        /* Non-buggy compilers preserve this; assert the correct value. */
 928        g_assert(cpu == current_cpu);
 929#endif
 930
 931#ifndef CONFIG_SOFTMMU
 932        clear_helper_retaddr();
 933        tcg_debug_assert(!have_mmap_lock());
 934#endif
 935        if (qemu_mutex_iothread_locked()) {
 936            qemu_mutex_unlock_iothread();
 937        }
 938        qemu_plugin_disable_mem_helpers(cpu);
 939
 940        assert_no_pages_locked();
 941    }
 942
 943    /* if an exception is pending, we execute it here */
 944    while (!cpu_handle_exception(cpu, &ret)) {
 945        TranslationBlock *last_tb = NULL;
 946        int tb_exit = 0;
 947
 948        while (!cpu_handle_interrupt(cpu, &last_tb)) {
 949            TranslationBlock *tb;
 950            target_ulong cs_base, pc;
 951            uint32_t flags, cflags;
 952
 953            cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
 954
 955            /*
 956             * When requested, use an exact setting for cflags for the next
 957             * execution.  This is used for icount, precise smc, and stop-
 958             * after-access watchpoints.  Since this request should never
 959             * have CF_INVALID set, -1 is a convenient invalid value that
 960             * does not require tcg headers for cpu_common_reset.
 961             */
 962            cflags = cpu->cflags_next_tb;
 963            if (cflags == -1) {
 964                cflags = curr_cflags(cpu);
 965            } else {
 966                cpu->cflags_next_tb = -1;
 967            }
 968
 969            if (check_for_breakpoints(cpu, pc, &cflags)) {
 970                break;
 971            }
 972
 973            tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 974            if (tb == NULL) {
 975                mmap_lock();
 976                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 977                mmap_unlock();
 978                /*
 979                 * We add the TB in the virtual pc hash table
 980                 * for the fast lookup
 981                 */
 982                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
 983            }
 984
 985#ifndef CONFIG_USER_ONLY
 986            /*
 987             * We don't take care of direct jumps when address mapping
 988             * changes in system emulation.  So it's not safe to make a
 989             * direct jump to a TB spanning two pages because the mapping
 990             * for the second page can change.
 991             */
 992            if (tb->page_addr[1] != -1) {
 993                last_tb = NULL;
 994            }
 995#endif
 996            /* See if we can patch the calling TB. */
 997            if (last_tb) {
 998                tb_add_jump(last_tb, tb_exit, tb);
 999            }
1000
1001            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
1002
1003            /* Try to align the host and virtual clocks
1004               if the guest is in advance */
1005            align_clocks(&sc, cpu);
1006        }
1007    }
1008
1009    cpu_exec_exit(cpu);
1010    rcu_read_unlock();
1011
1012    return ret;
1013}
1014
1015void tcg_exec_realizefn(CPUState *cpu, Error **errp)
1016{
1017    static bool tcg_target_initialized;
1018    CPUClass *cc = CPU_GET_CLASS(cpu);
1019
1020    if (!tcg_target_initialized) {
1021        cc->tcg_ops->initialize();
1022        tcg_target_initialized = true;
1023    }
1024    tlb_init(cpu);
1025    qemu_plugin_vcpu_init_hook(cpu);
1026
1027#ifndef CONFIG_USER_ONLY
1028    tcg_iommu_init_notifier_list(cpu);
1029#endif /* !CONFIG_USER_ONLY */
1030}
1031
1032/* undo the initializations in reverse order */
1033void tcg_exec_unrealizefn(CPUState *cpu)
1034{
1035#ifndef CONFIG_USER_ONLY
1036    tcg_iommu_free_notifier_list(cpu);
1037#endif /* !CONFIG_USER_ONLY */
1038
1039    qemu_plugin_vcpu_exit_hook(cpu);
1040    tlb_destroy(cpu);
1041}
1042
1043#ifndef CONFIG_USER_ONLY
1044
1045void dump_drift_info(GString *buf)
1046{
1047    if (!icount_enabled()) {
1048        return;
1049    }
1050
1051    g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
1052                           (cpu_get_clock() - icount_get()) / SCALE_MS);
1053    if (icount_align_option) {
1054        g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
1055                               -max_delay / SCALE_MS);
1056        g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
1057                               max_advance / SCALE_MS);
1058    } else {
1059        g_string_append_printf(buf, "Max guest delay     NA\n");
1060        g_string_append_printf(buf, "Max guest advance   NA\n");
1061    }
1062}
1063
1064HumanReadableText *qmp_x_query_jit(Error **errp)
1065{
1066    g_autoptr(GString) buf = g_string_new("");
1067
1068    if (!tcg_enabled()) {
1069        error_setg(errp, "JIT information is only available with accel=tcg");
1070        return NULL;
1071    }
1072
1073    dump_exec_info(buf);
1074    dump_drift_info(buf);
1075
1076    return human_readable_text_from_str(buf);
1077}
1078
1079HumanReadableText *qmp_x_query_opcount(Error **errp)
1080{
1081    g_autoptr(GString) buf = g_string_new("");
1082
1083    if (!tcg_enabled()) {
1084        error_setg(errp, "Opcode count information is only available with accel=tcg");
1085        return NULL;
1086    }
1087
1088    dump_opcount_info(buf);
1089
1090    return human_readable_text_from_str(buf);
1091}
1092
1093#endif /* !CONFIG_USER_ONLY */
1094