qemu/accel/tcg/cpu-exec.c
<<
>>
Prefs
   1/*
   2 *  emulator main execution loop
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/qemu-print.h"
  22#include "qapi/error.h"
  23#include "qapi/qapi-commands-machine.h"
  24#include "qapi/type-helpers.h"
  25#include "hw/core/tcg-cpu-ops.h"
  26#include "trace.h"
  27#include "disas/disas.h"
  28#include "exec/exec-all.h"
  29#include "tcg/tcg.h"
  30#include "qemu/atomic.h"
  31#include "qemu/compiler.h"
  32#include "qemu/timer.h"
  33#include "qemu/rcu.h"
  34#include "exec/log.h"
  35#include "qemu/main-loop.h"
  36#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  37#include "hw/i386/apic.h"
  38#endif
  39#include "sysemu/cpus.h"
  40#include "exec/cpu-all.h"
  41#include "sysemu/cpu-timers.h"
  42#include "sysemu/replay.h"
  43#include "sysemu/tcg.h"
  44#include "exec/helper-proto.h"
  45#include "tb-hash.h"
  46#include "tb-context.h"
  47#include "internal.h"
  48
  49/* -icount align implementation. */
  50
  51typedef struct SyncClocks {
  52    int64_t diff_clk;
  53    int64_t last_cpu_icount;
  54    int64_t realtime_clock;
  55} SyncClocks;
  56
  57#if !defined(CONFIG_USER_ONLY)
  58/* Allow the guest to have a max 3ms advance.
  59 * The difference between the 2 clocks could therefore
  60 * oscillate around 0.
  61 */
  62#define VM_CLOCK_ADVANCE 3000000
  63#define THRESHOLD_REDUCE 1.5
  64#define MAX_DELAY_PRINT_RATE 2000000000LL
  65#define MAX_NB_PRINTS 100
  66
  67static int64_t max_delay;
  68static int64_t max_advance;
  69
  70static void align_clocks(SyncClocks *sc, CPUState *cpu)
  71{
  72    int64_t cpu_icount;
  73
  74    if (!icount_align_option) {
  75        return;
  76    }
  77
  78    cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
  79    sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  80    sc->last_cpu_icount = cpu_icount;
  81
  82    if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  83#ifndef _WIN32
  84        struct timespec sleep_delay, rem_delay;
  85        sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  86        sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  87        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  88            sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  89        } else {
  90            sc->diff_clk = 0;
  91        }
  92#else
  93        Sleep(sc->diff_clk / SCALE_MS);
  94        sc->diff_clk = 0;
  95#endif
  96    }
  97}
  98
  99static void print_delay(const SyncClocks *sc)
 100{
 101    static float threshold_delay;
 102    static int64_t last_realtime_clock;
 103    static int nb_prints;
 104
 105    if (icount_align_option &&
 106        sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
 107        nb_prints < MAX_NB_PRINTS) {
 108        if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
 109            (-sc->diff_clk / (float)1000000000LL <
 110             (threshold_delay - THRESHOLD_REDUCE))) {
 111            threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
 112            qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
 113                        threshold_delay - 1,
 114                        threshold_delay);
 115            nb_prints++;
 116            last_realtime_clock = sc->realtime_clock;
 117        }
 118    }
 119}
 120
 121static void init_delay_params(SyncClocks *sc, CPUState *cpu)
 122{
 123    if (!icount_align_option) {
 124        return;
 125    }
 126    sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 127    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
 128    sc->last_cpu_icount
 129        = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
 130    if (sc->diff_clk < max_delay) {
 131        max_delay = sc->diff_clk;
 132    }
 133    if (sc->diff_clk > max_advance) {
 134        max_advance = sc->diff_clk;
 135    }
 136
 137    /* Print every 2s max if the guest is late. We limit the number
 138       of printed messages to NB_PRINT_MAX(currently 100) */
 139    print_delay(sc);
 140}
 141#else
 142static void align_clocks(SyncClocks *sc, const CPUState *cpu)
 143{
 144}
 145
 146static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 147{
 148}
 149#endif /* CONFIG USER ONLY */
 150
 151uint32_t curr_cflags(CPUState *cpu)
 152{
 153    uint32_t cflags = cpu->tcg_cflags;
 154
 155    /*
 156     * Record gdb single-step.  We should be exiting the TB by raising
 157     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
 158     *
 159     * For singlestep and -d nochain, suppress goto_tb so that
 160     * we can log -d cpu,exec after every TB.
 161     */
 162    if (unlikely(cpu->singlestep_enabled)) {
 163        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
 164    } else if (singlestep) {
 165        cflags |= CF_NO_GOTO_TB | 1;
 166    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
 167        cflags |= CF_NO_GOTO_TB;
 168    }
 169
 170    return cflags;
 171}
 172
 173/* Might cause an exception, so have a longjmp destination ready */
 174static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
 175                                          target_ulong cs_base,
 176                                          uint32_t flags, uint32_t cflags)
 177{
 178    TranslationBlock *tb;
 179    uint32_t hash;
 180
 181    /* we should never be trying to look up an INVALID tb */
 182    tcg_debug_assert(!(cflags & CF_INVALID));
 183
 184    hash = tb_jmp_cache_hash_func(pc);
 185    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 186
 187    if (likely(tb &&
 188               tb->pc == pc &&
 189               tb->cs_base == cs_base &&
 190               tb->flags == flags &&
 191               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 192               tb_cflags(tb) == cflags)) {
 193        return tb;
 194    }
 195    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 196    if (tb == NULL) {
 197        return NULL;
 198    }
 199    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
 200    return tb;
 201}
 202
 203static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
 204                                const TranslationBlock *tb)
 205{
 206    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
 207        && qemu_log_in_addr_range(pc)) {
 208
 209        qemu_log_mask(CPU_LOG_EXEC,
 210                      "Trace %d: %p [" TARGET_FMT_lx
 211                      "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
 212                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
 213                      tb->flags, tb->cflags, lookup_symbol(pc));
 214
 215#if defined(DEBUG_DISAS)
 216        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
 217            FILE *logfile = qemu_log_trylock();
 218            if (logfile) {
 219                int flags = 0;
 220
 221                if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
 222                    flags |= CPU_DUMP_FPU;
 223                }
 224#if defined(TARGET_I386)
 225                flags |= CPU_DUMP_CCOP;
 226#endif
 227                cpu_dump_state(cpu, logfile, flags);
 228                qemu_log_unlock(logfile);
 229            }
 230        }
 231#endif /* DEBUG_DISAS */
 232    }
 233}
 234
 235static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
 236                                  uint32_t *cflags)
 237{
 238    CPUBreakpoint *bp;
 239    bool match_page = false;
 240
 241    if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
 242        return false;
 243    }
 244
 245    /*
 246     * Singlestep overrides breakpoints.
 247     * This requirement is visible in the record-replay tests, where
 248     * we would fail to make forward progress in reverse-continue.
 249     *
 250     * TODO: gdb singlestep should only override gdb breakpoints,
 251     * so that one could (gdb) singlestep into the guest kernel's
 252     * architectural breakpoint handler.
 253     */
 254    if (cpu->singlestep_enabled) {
 255        return false;
 256    }
 257
 258    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 259        /*
 260         * If we have an exact pc match, trigger the breakpoint.
 261         * Otherwise, note matches within the page.
 262         */
 263        if (pc == bp->pc) {
 264            bool match_bp = false;
 265
 266            if (bp->flags & BP_GDB) {
 267                match_bp = true;
 268            } else if (bp->flags & BP_CPU) {
 269#ifdef CONFIG_USER_ONLY
 270                g_assert_not_reached();
 271#else
 272                CPUClass *cc = CPU_GET_CLASS(cpu);
 273                assert(cc->tcg_ops->debug_check_breakpoint);
 274                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 275#endif
 276            }
 277
 278            if (match_bp) {
 279                cpu->exception_index = EXCP_DEBUG;
 280                return true;
 281            }
 282        } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
 283            match_page = true;
 284        }
 285    }
 286
 287    /*
 288     * Within the same page as a breakpoint, single-step,
 289     * returning to helper_lookup_tb_ptr after each insn looking
 290     * for the actual breakpoint.
 291     *
 292     * TODO: Perhaps better to record all of the TBs associated
 293     * with a given virtual page that contains a breakpoint, and
 294     * then invalidate them when a new overlapping breakpoint is
 295     * set on the page.  Non-overlapping TBs would not be
 296     * invalidated, nor would any TB need to be invalidated as
 297     * breakpoints are removed.
 298     */
 299    if (match_page) {
 300        *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
 301    }
 302    return false;
 303}
 304
 305/**
 306 * helper_lookup_tb_ptr: quick check for next tb
 307 * @env: current cpu state
 308 *
 309 * Look for an existing TB matching the current cpu state.
 310 * If found, return the code pointer.  If not found, return
 311 * the tcg epilogue so that we return into cpu_tb_exec.
 312 */
 313const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 314{
 315    CPUState *cpu = env_cpu(env);
 316    TranslationBlock *tb;
 317    target_ulong cs_base, pc;
 318    uint32_t flags, cflags;
 319
 320    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 321
 322    cflags = curr_cflags(cpu);
 323    if (check_for_breakpoints(cpu, pc, &cflags)) {
 324        cpu_loop_exit(cpu);
 325    }
 326
 327    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 328    if (tb == NULL) {
 329        return tcg_code_gen_epilogue;
 330    }
 331
 332    log_cpu_exec(pc, cpu, tb);
 333
 334    return tb->tc.ptr;
 335}
 336
 337/* Execute a TB, and fix up the CPU state afterwards if necessary */
 338/*
 339 * Disable CFI checks.
 340 * TCG creates binary blobs at runtime, with the transformed code.
 341 * A TB is a blob of binary code, created at runtime and called with an
 342 * indirect function call. Since such function did not exist at compile time,
 343 * the CFI runtime has no way to verify its signature and would fail.
 344 * TCG is not considered a security-sensitive part of QEMU so this does not
 345 * affect the impact of CFI in environment with high security requirements
 346 */
 347static inline TranslationBlock * QEMU_DISABLE_CFI
 348cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 349{
 350    CPUArchState *env = cpu->env_ptr;
 351    uintptr_t ret;
 352    TranslationBlock *last_tb;
 353    const void *tb_ptr = itb->tc.ptr;
 354
 355    log_cpu_exec(itb->pc, cpu, itb);
 356
 357    qemu_thread_jit_execute();
 358    ret = tcg_qemu_tb_exec(env, tb_ptr);
 359    cpu->can_do_io = 1;
 360    /*
 361     * TODO: Delay swapping back to the read-write region of the TB
 362     * until we actually need to modify the TB.  The read-only copy,
 363     * coming from the rx region, shares the same host TLB entry as
 364     * the code that executed the exit_tb opcode that arrived here.
 365     * If we insist on touching both the RX and the RW pages, we
 366     * double the host TLB pressure.
 367     */
 368    last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
 369    *tb_exit = ret & TB_EXIT_MASK;
 370
 371    trace_exec_tb_exit(last_tb, *tb_exit);
 372
 373    if (*tb_exit > TB_EXIT_IDX1) {
 374        /* We didn't start executing this TB (eg because the instruction
 375         * counter hit zero); we must restore the guest PC to the address
 376         * of the start of the TB.
 377         */
 378        CPUClass *cc = CPU_GET_CLASS(cpu);
 379        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
 380                               "Stopped execution of TB chain before %p ["
 381                               TARGET_FMT_lx "] %s\n",
 382                               last_tb->tc.ptr, last_tb->pc,
 383                               lookup_symbol(last_tb->pc));
 384        if (cc->tcg_ops->synchronize_from_tb) {
 385            cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
 386        } else {
 387            assert(cc->set_pc);
 388            cc->set_pc(cpu, last_tb->pc);
 389        }
 390    }
 391
 392    /*
 393     * If gdb single-step, and we haven't raised another exception,
 394     * raise a debug exception.  Single-step with another exception
 395     * is handled in cpu_handle_exception.
 396     */
 397    if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
 398        cpu->exception_index = EXCP_DEBUG;
 399        cpu_loop_exit(cpu);
 400    }
 401
 402    return last_tb;
 403}
 404
 405
 406static void cpu_exec_enter(CPUState *cpu)
 407{
 408    CPUClass *cc = CPU_GET_CLASS(cpu);
 409
 410    if (cc->tcg_ops->cpu_exec_enter) {
 411        cc->tcg_ops->cpu_exec_enter(cpu);
 412    }
 413}
 414
 415static void cpu_exec_exit(CPUState *cpu)
 416{
 417    CPUClass *cc = CPU_GET_CLASS(cpu);
 418
 419    if (cc->tcg_ops->cpu_exec_exit) {
 420        cc->tcg_ops->cpu_exec_exit(cpu);
 421    }
 422}
 423
 424void cpu_exec_step_atomic(CPUState *cpu)
 425{
 426    CPUArchState *env = cpu->env_ptr;
 427    TranslationBlock *tb;
 428    target_ulong cs_base, pc;
 429    uint32_t flags, cflags;
 430    int tb_exit;
 431
 432    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
 433        start_exclusive();
 434        g_assert(cpu == current_cpu);
 435        g_assert(!cpu->running);
 436        cpu->running = true;
 437
 438        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 439
 440        cflags = curr_cflags(cpu);
 441        /* Execute in a serial context. */
 442        cflags &= ~CF_PARALLEL;
 443        /* After 1 insn, return and release the exclusive lock. */
 444        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
 445        /*
 446         * No need to check_for_breakpoints here.
 447         * We only arrive in cpu_exec_step_atomic after beginning execution
 448         * of an insn that includes an atomic operation we can't handle.
 449         * Any breakpoint for this insn will have been recognized earlier.
 450         */
 451
 452        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 453        if (tb == NULL) {
 454            mmap_lock();
 455            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 456            mmap_unlock();
 457        }
 458
 459        cpu_exec_enter(cpu);
 460        /* execute the generated code */
 461        trace_exec_tb(tb, pc);
 462        cpu_tb_exec(cpu, tb, &tb_exit);
 463        cpu_exec_exit(cpu);
 464    } else {
 465        /*
 466         * The mmap_lock is dropped by tb_gen_code if it runs out of
 467         * memory.
 468         */
 469#ifndef CONFIG_SOFTMMU
 470        clear_helper_retaddr();
 471        tcg_debug_assert(!have_mmap_lock());
 472#endif
 473        if (qemu_mutex_iothread_locked()) {
 474            qemu_mutex_unlock_iothread();
 475        }
 476        assert_no_pages_locked();
 477        qemu_plugin_disable_mem_helpers(cpu);
 478    }
 479
 480    /*
 481     * As we start the exclusive region before codegen we must still
 482     * be in the region if we longjump out of either the codegen or
 483     * the execution.
 484     */
 485    g_assert(cpu_in_exclusive_context(cpu));
 486    cpu->running = false;
 487    end_exclusive();
 488}
 489
 490struct tb_desc {
 491    target_ulong pc;
 492    target_ulong cs_base;
 493    CPUArchState *env;
 494    tb_page_addr_t phys_page1;
 495    uint32_t flags;
 496    uint32_t cflags;
 497    uint32_t trace_vcpu_dstate;
 498};
 499
 500static bool tb_lookup_cmp(const void *p, const void *d)
 501{
 502    const TranslationBlock *tb = p;
 503    const struct tb_desc *desc = d;
 504
 505    if (tb->pc == desc->pc &&
 506        tb->page_addr[0] == desc->phys_page1 &&
 507        tb->cs_base == desc->cs_base &&
 508        tb->flags == desc->flags &&
 509        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 510        tb_cflags(tb) == desc->cflags) {
 511        /* check next page if needed */
 512        if (tb->page_addr[1] == -1) {
 513            return true;
 514        } else {
 515            tb_page_addr_t phys_page2;
 516            target_ulong virt_page2;
 517
 518            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 519            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 520            if (tb->page_addr[1] == phys_page2) {
 521                return true;
 522            }
 523        }
 524    }
 525    return false;
 526}
 527
 528TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 529                                   target_ulong cs_base, uint32_t flags,
 530                                   uint32_t cflags)
 531{
 532    tb_page_addr_t phys_pc;
 533    struct tb_desc desc;
 534    uint32_t h;
 535
 536    desc.env = cpu->env_ptr;
 537    desc.cs_base = cs_base;
 538    desc.flags = flags;
 539    desc.cflags = cflags;
 540    desc.trace_vcpu_dstate = *cpu->trace_dstate;
 541    desc.pc = pc;
 542    phys_pc = get_page_addr_code(desc.env, pc);
 543    if (phys_pc == -1) {
 544        return NULL;
 545    }
 546    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
 547    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
 548    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 549}
 550
 551void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 552{
 553    if (TCG_TARGET_HAS_direct_jump) {
 554        uintptr_t offset = tb->jmp_target_arg[n];
 555        uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
 556        uintptr_t jmp_rx = tc_ptr + offset;
 557        uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
 558        tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr);
 559    } else {
 560        tb->jmp_target_arg[n] = addr;
 561    }
 562}
 563
 564static inline void tb_add_jump(TranslationBlock *tb, int n,
 565                               TranslationBlock *tb_next)
 566{
 567    uintptr_t old;
 568
 569    qemu_thread_jit_write();
 570    assert(n < ARRAY_SIZE(tb->jmp_list_next));
 571    qemu_spin_lock(&tb_next->jmp_lock);
 572
 573    /* make sure the destination TB is valid */
 574    if (tb_next->cflags & CF_INVALID) {
 575        goto out_unlock_next;
 576    }
 577    /* Atomically claim the jump destination slot only if it was NULL */
 578    old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
 579                          (uintptr_t)tb_next);
 580    if (old) {
 581        goto out_unlock_next;
 582    }
 583
 584    /* patch the native jump address */
 585    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
 586
 587    /* add in TB jmp list */
 588    tb->jmp_list_next[n] = tb_next->jmp_list_head;
 589    tb_next->jmp_list_head = (uintptr_t)tb | n;
 590
 591    qemu_spin_unlock(&tb_next->jmp_lock);
 592
 593    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
 594                           "Linking TBs %p [" TARGET_FMT_lx
 595                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
 596                           tb->tc.ptr, tb->pc, n,
 597                           tb_next->tc.ptr, tb_next->pc);
 598    return;
 599
 600 out_unlock_next:
 601    qemu_spin_unlock(&tb_next->jmp_lock);
 602    return;
 603}
 604
 605static inline bool cpu_handle_halt(CPUState *cpu)
 606{
 607#ifndef CONFIG_USER_ONLY
 608    if (cpu->halted) {
 609#if defined(TARGET_I386)
 610        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 611            X86CPU *x86_cpu = X86_CPU(cpu);
 612            qemu_mutex_lock_iothread();
 613            apic_poll_irq(x86_cpu->apic_state);
 614            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
 615            qemu_mutex_unlock_iothread();
 616        }
 617#endif /* TARGET_I386 */
 618        if (!cpu_has_work(cpu)) {
 619            return true;
 620        }
 621
 622        cpu->halted = 0;
 623    }
 624#endif /* !CONFIG_USER_ONLY */
 625
 626    return false;
 627}
 628
 629static inline void cpu_handle_debug_exception(CPUState *cpu)
 630{
 631    CPUClass *cc = CPU_GET_CLASS(cpu);
 632    CPUWatchpoint *wp;
 633
 634    if (!cpu->watchpoint_hit) {
 635        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 636            wp->flags &= ~BP_WATCHPOINT_HIT;
 637        }
 638    }
 639
 640    if (cc->tcg_ops->debug_excp_handler) {
 641        cc->tcg_ops->debug_excp_handler(cpu);
 642    }
 643}
 644
 645static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 646{
 647    if (cpu->exception_index < 0) {
 648#ifndef CONFIG_USER_ONLY
 649        if (replay_has_exception()
 650            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
 651            /* Execute just one insn to trigger exception pending in the log */
 652            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
 653                | CF_NOIRQ | 1;
 654        }
 655#endif
 656        return false;
 657    }
 658    if (cpu->exception_index >= EXCP_INTERRUPT) {
 659        /* exit request from the cpu execution loop */
 660        *ret = cpu->exception_index;
 661        if (*ret == EXCP_DEBUG) {
 662            cpu_handle_debug_exception(cpu);
 663        }
 664        cpu->exception_index = -1;
 665        return true;
 666    } else {
 667#if defined(CONFIG_USER_ONLY)
 668        /* if user mode only, we simulate a fake exception
 669           which will be handled outside the cpu execution
 670           loop */
 671#if defined(TARGET_I386)
 672        CPUClass *cc = CPU_GET_CLASS(cpu);
 673        cc->tcg_ops->fake_user_interrupt(cpu);
 674#endif /* TARGET_I386 */
 675        *ret = cpu->exception_index;
 676        cpu->exception_index = -1;
 677        return true;
 678#else
 679        if (replay_exception()) {
 680            CPUClass *cc = CPU_GET_CLASS(cpu);
 681            qemu_mutex_lock_iothread();
 682            cc->tcg_ops->do_interrupt(cpu);
 683            qemu_mutex_unlock_iothread();
 684            cpu->exception_index = -1;
 685
 686            if (unlikely(cpu->singlestep_enabled)) {
 687                /*
 688                 * After processing the exception, ensure an EXCP_DEBUG is
 689                 * raised when single-stepping so that GDB doesn't miss the
 690                 * next instruction.
 691                 */
 692                *ret = EXCP_DEBUG;
 693                cpu_handle_debug_exception(cpu);
 694                return true;
 695            }
 696        } else if (!replay_has_interrupt()) {
 697            /* give a chance to iothread in replay mode */
 698            *ret = EXCP_INTERRUPT;
 699            return true;
 700        }
 701#endif
 702    }
 703
 704    return false;
 705}
 706
 707#ifndef CONFIG_USER_ONLY
 708/*
 709 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 710 * "real" interrupt event later. It does not need to be recorded for
 711 * replay purposes.
 712 */
 713static inline bool need_replay_interrupt(int interrupt_request)
 714{
 715#if defined(TARGET_I386)
 716    return !(interrupt_request & CPU_INTERRUPT_POLL);
 717#else
 718    return true;
 719#endif
 720}
 721#endif /* !CONFIG_USER_ONLY */
 722
 723static inline bool cpu_handle_interrupt(CPUState *cpu,
 724                                        TranslationBlock **last_tb)
 725{
 726    /*
 727     * If we have requested custom cflags with CF_NOIRQ we should
 728     * skip checking here. Any pending interrupts will get picked up
 729     * by the next TB we execute under normal cflags.
 730     */
 731    if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
 732        return false;
 733    }
 734
 735    /* Clear the interrupt flag now since we're processing
 736     * cpu->interrupt_request and cpu->exit_request.
 737     * Ensure zeroing happens before reading cpu->exit_request or
 738     * cpu->interrupt_request (see also smp_wmb in cpu_exit())
 739     */
 740    qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
 741
 742    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
 743        int interrupt_request;
 744        qemu_mutex_lock_iothread();
 745        interrupt_request = cpu->interrupt_request;
 746        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
 747            /* Mask out external interrupts for this step. */
 748            interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
 749        }
 750        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
 751            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
 752            cpu->exception_index = EXCP_DEBUG;
 753            qemu_mutex_unlock_iothread();
 754            return true;
 755        }
 756#if !defined(CONFIG_USER_ONLY)
 757        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
 758            /* Do nothing */
 759        } else if (interrupt_request & CPU_INTERRUPT_HALT) {
 760            replay_interrupt();
 761            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
 762            cpu->halted = 1;
 763            cpu->exception_index = EXCP_HLT;
 764            qemu_mutex_unlock_iothread();
 765            return true;
 766        }
 767#if defined(TARGET_I386)
 768        else if (interrupt_request & CPU_INTERRUPT_INIT) {
 769            X86CPU *x86_cpu = X86_CPU(cpu);
 770            CPUArchState *env = &x86_cpu->env;
 771            replay_interrupt();
 772            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
 773            do_cpu_init(x86_cpu);
 774            cpu->exception_index = EXCP_HALTED;
 775            qemu_mutex_unlock_iothread();
 776            return true;
 777        }
 778#else
 779        else if (interrupt_request & CPU_INTERRUPT_RESET) {
 780            replay_interrupt();
 781            cpu_reset(cpu);
 782            qemu_mutex_unlock_iothread();
 783            return true;
 784        }
 785#endif /* !TARGET_I386 */
 786        /* The target hook has 3 exit conditions:
 787           False when the interrupt isn't processed,
 788           True when it is, and we should restart on a new TB,
 789           and via longjmp via cpu_loop_exit.  */
 790        else {
 791            CPUClass *cc = CPU_GET_CLASS(cpu);
 792
 793            if (cc->tcg_ops->cpu_exec_interrupt &&
 794                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
 795                if (need_replay_interrupt(interrupt_request)) {
 796                    replay_interrupt();
 797                }
 798                /*
 799                 * After processing the interrupt, ensure an EXCP_DEBUG is
 800                 * raised when single-stepping so that GDB doesn't miss the
 801                 * next instruction.
 802                 */
 803                if (unlikely(cpu->singlestep_enabled)) {
 804                    cpu->exception_index = EXCP_DEBUG;
 805                    qemu_mutex_unlock_iothread();
 806                    return true;
 807                }
 808                cpu->exception_index = -1;
 809                *last_tb = NULL;
 810            }
 811            /* The target hook may have updated the 'cpu->interrupt_request';
 812             * reload the 'interrupt_request' value */
 813            interrupt_request = cpu->interrupt_request;
 814        }
 815#endif /* !CONFIG_USER_ONLY */
 816        if (interrupt_request & CPU_INTERRUPT_EXITTB) {
 817            cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
 818            /* ensure that no TB jump will be modified as
 819               the program flow was changed */
 820            *last_tb = NULL;
 821        }
 822
 823        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
 824        qemu_mutex_unlock_iothread();
 825    }
 826
 827    /* Finally, check if we need to exit to the main loop.  */
 828    if (unlikely(qatomic_read(&cpu->exit_request))
 829        || (icount_enabled()
 830            && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
 831            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
 832        qatomic_set(&cpu->exit_request, 0);
 833        if (cpu->exception_index == -1) {
 834            cpu->exception_index = EXCP_INTERRUPT;
 835        }
 836        return true;
 837    }
 838
 839    return false;
 840}
 841
 842static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 843                                    TranslationBlock **last_tb, int *tb_exit)
 844{
 845    int32_t insns_left;
 846
 847    trace_exec_tb(tb, tb->pc);
 848    tb = cpu_tb_exec(cpu, tb, tb_exit);
 849    if (*tb_exit != TB_EXIT_REQUESTED) {
 850        *last_tb = tb;
 851        return;
 852    }
 853
 854    *last_tb = NULL;
 855    insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
 856    if (insns_left < 0) {
 857        /* Something asked us to stop executing chained TBs; just
 858         * continue round the main loop. Whatever requested the exit
 859         * will also have set something else (eg exit_request or
 860         * interrupt_request) which will be handled by
 861         * cpu_handle_interrupt.  cpu_handle_interrupt will also
 862         * clear cpu->icount_decr.u16.high.
 863         */
 864        return;
 865    }
 866
 867    /* Instruction counter expired.  */
 868    assert(icount_enabled());
 869#ifndef CONFIG_USER_ONLY
 870    /* Ensure global icount has gone forward */
 871    icount_update(cpu);
 872    /* Refill decrementer and continue execution.  */
 873    insns_left = MIN(0xffff, cpu->icount_budget);
 874    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 875    cpu->icount_extra = cpu->icount_budget - insns_left;
 876
 877    /*
 878     * If the next tb has more instructions than we have left to
 879     * execute we need to ensure we find/generate a TB with exactly
 880     * insns_left instructions in it.
 881     */
 882    if (insns_left > 0 && insns_left < tb->icount)  {
 883        assert(insns_left <= CF_COUNT_MASK);
 884        assert(cpu->icount_extra == 0);
 885        cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
 886    }
 887#endif
 888}
 889
 890/* main execution loop */
 891
 892int cpu_exec(CPUState *cpu)
 893{
 894    int ret;
 895    SyncClocks sc = { 0 };
 896
 897    /* replay_interrupt may need current_cpu */
 898    current_cpu = cpu;
 899
 900    if (cpu_handle_halt(cpu)) {
 901        return EXCP_HALTED;
 902    }
 903
 904    rcu_read_lock();
 905
 906    cpu_exec_enter(cpu);
 907
 908    /* Calculate difference between guest clock and host clock.
 909     * This delay includes the delay of the last cycle, so
 910     * what we have to do is sleep until it is 0. As for the
 911     * advance/delay we gain here, we try to fix it next time.
 912     */
 913    init_delay_params(&sc, cpu);
 914
 915    /* prepare setjmp context for exception handling */
 916    if (sigsetjmp(cpu->jmp_env, 0) != 0) {
 917#if defined(__clang__)
 918        /*
 919         * Some compilers wrongly smash all local variables after
 920         * siglongjmp (the spec requires that only non-volatile locals
 921         * which are changed between the sigsetjmp and siglongjmp are
 922         * permitted to be trashed). There were bug reports for gcc
 923         * 4.5.0 and clang.  The bug is fixed in all versions of gcc
 924         * that we support, but is still unfixed in clang:
 925         *   https://bugs.llvm.org/show_bug.cgi?id=21183
 926         *
 927         * Reload an essential local variable here for those compilers.
 928         * Newer versions of gcc would complain about this code (-Wclobbered),
 929         * so we only perform the workaround for clang.
 930         */
 931        cpu = current_cpu;
 932#else
 933        /* Non-buggy compilers preserve this; assert the correct value. */
 934        g_assert(cpu == current_cpu);
 935#endif
 936
 937#ifndef CONFIG_SOFTMMU
 938        clear_helper_retaddr();
 939        tcg_debug_assert(!have_mmap_lock());
 940#endif
 941        if (qemu_mutex_iothread_locked()) {
 942            qemu_mutex_unlock_iothread();
 943        }
 944        qemu_plugin_disable_mem_helpers(cpu);
 945
 946        assert_no_pages_locked();
 947    }
 948
 949    /* if an exception is pending, we execute it here */
 950    while (!cpu_handle_exception(cpu, &ret)) {
 951        TranslationBlock *last_tb = NULL;
 952        int tb_exit = 0;
 953
 954        while (!cpu_handle_interrupt(cpu, &last_tb)) {
 955            TranslationBlock *tb;
 956            target_ulong cs_base, pc;
 957            uint32_t flags, cflags;
 958
 959            cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
 960
 961            /*
 962             * When requested, use an exact setting for cflags for the next
 963             * execution.  This is used for icount, precise smc, and stop-
 964             * after-access watchpoints.  Since this request should never
 965             * have CF_INVALID set, -1 is a convenient invalid value that
 966             * does not require tcg headers for cpu_common_reset.
 967             */
 968            cflags = cpu->cflags_next_tb;
 969            if (cflags == -1) {
 970                cflags = curr_cflags(cpu);
 971            } else {
 972                cpu->cflags_next_tb = -1;
 973            }
 974
 975            if (check_for_breakpoints(cpu, pc, &cflags)) {
 976                break;
 977            }
 978
 979            tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 980            if (tb == NULL) {
 981                mmap_lock();
 982                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 983                mmap_unlock();
 984                /*
 985                 * We add the TB in the virtual pc hash table
 986                 * for the fast lookup
 987                 */
 988                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
 989            }
 990
 991#ifndef CONFIG_USER_ONLY
 992            /*
 993             * We don't take care of direct jumps when address mapping
 994             * changes in system emulation.  So it's not safe to make a
 995             * direct jump to a TB spanning two pages because the mapping
 996             * for the second page can change.
 997             */
 998            if (tb->page_addr[1] != -1) {
 999                last_tb = NULL;
1000            }
1001#endif
1002            /* See if we can patch the calling TB. */
1003            if (last_tb) {
1004                tb_add_jump(last_tb, tb_exit, tb);
1005            }
1006
1007            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
1008
1009            /* Try to align the host and virtual clocks
1010               if the guest is in advance */
1011            align_clocks(&sc, cpu);
1012        }
1013    }
1014
1015    cpu_exec_exit(cpu);
1016    rcu_read_unlock();
1017
1018    return ret;
1019}
1020
1021void tcg_exec_realizefn(CPUState *cpu, Error **errp)
1022{
1023    static bool tcg_target_initialized;
1024    CPUClass *cc = CPU_GET_CLASS(cpu);
1025
1026    if (!tcg_target_initialized) {
1027        cc->tcg_ops->initialize();
1028        tcg_target_initialized = true;
1029    }
1030    tlb_init(cpu);
1031    qemu_plugin_vcpu_init_hook(cpu);
1032
1033#ifndef CONFIG_USER_ONLY
1034    tcg_iommu_init_notifier_list(cpu);
1035#endif /* !CONFIG_USER_ONLY */
1036}
1037
1038/* undo the initializations in reverse order */
1039void tcg_exec_unrealizefn(CPUState *cpu)
1040{
1041#ifndef CONFIG_USER_ONLY
1042    tcg_iommu_free_notifier_list(cpu);
1043#endif /* !CONFIG_USER_ONLY */
1044
1045    qemu_plugin_vcpu_exit_hook(cpu);
1046    tlb_destroy(cpu);
1047}
1048
1049#ifndef CONFIG_USER_ONLY
1050
1051static void dump_drift_info(GString *buf)
1052{
1053    if (!icount_enabled()) {
1054        return;
1055    }
1056
1057    g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
1058                           (cpu_get_clock() - icount_get()) / SCALE_MS);
1059    if (icount_align_option) {
1060        g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
1061                               -max_delay / SCALE_MS);
1062        g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
1063                               max_advance / SCALE_MS);
1064    } else {
1065        g_string_append_printf(buf, "Max guest delay     NA\n");
1066        g_string_append_printf(buf, "Max guest advance   NA\n");
1067    }
1068}
1069
1070HumanReadableText *qmp_x_query_jit(Error **errp)
1071{
1072    g_autoptr(GString) buf = g_string_new("");
1073
1074    if (!tcg_enabled()) {
1075        error_setg(errp, "JIT information is only available with accel=tcg");
1076        return NULL;
1077    }
1078
1079    dump_exec_info(buf);
1080    dump_drift_info(buf);
1081
1082    return human_readable_text_from_str(buf);
1083}
1084
1085HumanReadableText *qmp_x_query_opcount(Error **errp)
1086{
1087    g_autoptr(GString) buf = g_string_new("");
1088
1089    if (!tcg_enabled()) {
1090        error_setg(errp, "Opcode count information is only available with accel=tcg");
1091        return NULL;
1092    }
1093
1094    tcg_dump_op_count(buf);
1095
1096    return human_readable_text_from_str(buf);
1097}
1098
1099#ifdef CONFIG_PROFILER
1100
1101int64_t dev_time;
1102
1103HumanReadableText *qmp_x_query_profile(Error **errp)
1104{
1105    g_autoptr(GString) buf = g_string_new("");
1106    static int64_t last_cpu_exec_time;
1107    int64_t cpu_exec_time;
1108    int64_t delta;
1109
1110    cpu_exec_time = tcg_cpu_exec_time();
1111    delta = cpu_exec_time - last_cpu_exec_time;
1112
1113    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
1114                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
1115    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
1116                           delta, delta / (double)NANOSECONDS_PER_SECOND);
1117    last_cpu_exec_time = cpu_exec_time;
1118    dev_time = 0;
1119
1120    return human_readable_text_from_str(buf);
1121}
1122#else
1123HumanReadableText *qmp_x_query_profile(Error **errp)
1124{
1125    error_setg(errp, "Internal profiler not compiled");
1126    return NULL;
1127}
1128#endif
1129
1130#endif /* !CONFIG_USER_ONLY */
1131