qemu/accel/tcg/cpu-exec.c
<<
>>
Prefs
   1/*
   2 *  emulator main execution loop
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/qemu-print.h"
  22#include "qapi/error.h"
  23#include "qapi/type-helpers.h"
  24#include "hw/core/tcg-cpu-ops.h"
  25#include "trace.h"
  26#include "disas/disas.h"
  27#include "exec/exec-all.h"
  28#include "tcg/tcg.h"
  29#include "qemu/atomic.h"
  30#include "qemu/rcu.h"
  31#include "exec/log.h"
  32#include "qemu/main-loop.h"
  33#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  34#include "hw/i386/apic.h"
  35#endif
  36#include "sysemu/cpus.h"
  37#include "exec/cpu-all.h"
  38#include "sysemu/cpu-timers.h"
  39#include "exec/replay-core.h"
  40#include "sysemu/tcg.h"
  41#include "exec/helper-proto.h"
  42#include "tb-jmp-cache.h"
  43#include "tb-hash.h"
  44#include "tb-context.h"
  45#include "internal.h"
  46
  47/* -icount align implementation. */
  48
  49typedef struct SyncClocks {
  50    int64_t diff_clk;
  51    int64_t last_cpu_icount;
  52    int64_t realtime_clock;
  53} SyncClocks;
  54
  55#if !defined(CONFIG_USER_ONLY)
  56/* Allow the guest to have a max 3ms advance.
  57 * The difference between the 2 clocks could therefore
  58 * oscillate around 0.
  59 */
  60#define VM_CLOCK_ADVANCE 3000000
  61#define THRESHOLD_REDUCE 1.5
  62#define MAX_DELAY_PRINT_RATE 2000000000LL
  63#define MAX_NB_PRINTS 100
  64
  65int64_t max_delay;
  66int64_t max_advance;
  67
  68static void align_clocks(SyncClocks *sc, CPUState *cpu)
  69{
  70    int64_t cpu_icount;
  71
  72    if (!icount_align_option) {
  73        return;
  74    }
  75
  76    cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
  77    sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  78    sc->last_cpu_icount = cpu_icount;
  79
  80    if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  81#ifndef _WIN32
  82        struct timespec sleep_delay, rem_delay;
  83        sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  84        sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  85        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  86            sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  87        } else {
  88            sc->diff_clk = 0;
  89        }
  90#else
  91        Sleep(sc->diff_clk / SCALE_MS);
  92        sc->diff_clk = 0;
  93#endif
  94    }
  95}
  96
  97static void print_delay(const SyncClocks *sc)
  98{
  99    static float threshold_delay;
 100    static int64_t last_realtime_clock;
 101    static int nb_prints;
 102
 103    if (icount_align_option &&
 104        sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
 105        nb_prints < MAX_NB_PRINTS) {
 106        if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
 107            (-sc->diff_clk / (float)1000000000LL <
 108             (threshold_delay - THRESHOLD_REDUCE))) {
 109            threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
 110            qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
 111                        threshold_delay - 1,
 112                        threshold_delay);
 113            nb_prints++;
 114            last_realtime_clock = sc->realtime_clock;
 115        }
 116    }
 117}
 118
 119static void init_delay_params(SyncClocks *sc, CPUState *cpu)
 120{
 121    if (!icount_align_option) {
 122        return;
 123    }
 124    sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 125    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
 126    sc->last_cpu_icount
 127        = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
 128    if (sc->diff_clk < max_delay) {
 129        max_delay = sc->diff_clk;
 130    }
 131    if (sc->diff_clk > max_advance) {
 132        max_advance = sc->diff_clk;
 133    }
 134
 135    /* Print every 2s max if the guest is late. We limit the number
 136       of printed messages to NB_PRINT_MAX(currently 100) */
 137    print_delay(sc);
 138}
 139#else
 140static void align_clocks(SyncClocks *sc, const CPUState *cpu)
 141{
 142}
 143
 144static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 145{
 146}
 147#endif /* CONFIG USER ONLY */
 148
 149uint32_t curr_cflags(CPUState *cpu)
 150{
 151    uint32_t cflags = cpu->tcg_cflags;
 152
 153    /*
 154     * Record gdb single-step.  We should be exiting the TB by raising
 155     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
 156     *
 157     * For singlestep and -d nochain, suppress goto_tb so that
 158     * we can log -d cpu,exec after every TB.
 159     */
 160    if (unlikely(cpu->singlestep_enabled)) {
 161        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
 162    } else if (singlestep) {
 163        cflags |= CF_NO_GOTO_TB | 1;
 164    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
 165        cflags |= CF_NO_GOTO_TB;
 166    }
 167
 168    return cflags;
 169}
 170
 171struct tb_desc {
 172    target_ulong pc;
 173    target_ulong cs_base;
 174    CPUArchState *env;
 175    tb_page_addr_t page_addr0;
 176    uint32_t flags;
 177    uint32_t cflags;
 178    uint32_t trace_vcpu_dstate;
 179};
 180
 181static bool tb_lookup_cmp(const void *p, const void *d)
 182{
 183    const TranslationBlock *tb = p;
 184    const struct tb_desc *desc = d;
 185
 186    if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) &&
 187        tb_page_addr0(tb) == desc->page_addr0 &&
 188        tb->cs_base == desc->cs_base &&
 189        tb->flags == desc->flags &&
 190        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 191        tb_cflags(tb) == desc->cflags) {
 192        /* check next page if needed */
 193        tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
 194        if (tb_phys_page1 == -1) {
 195            return true;
 196        } else {
 197            tb_page_addr_t phys_page1;
 198            target_ulong virt_page1;
 199
 200            /*
 201             * We know that the first page matched, and an otherwise valid TB
 202             * encountered an incomplete instruction at the end of that page,
 203             * therefore we know that generating a new TB from the current PC
 204             * must also require reading from the next page -- even if the
 205             * second pages do not match, and therefore the resulting insn
 206             * is different for the new TB.  Therefore any exception raised
 207             * here by the faulting lookup is not premature.
 208             */
 209            virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
 210            phys_page1 = get_page_addr_code(desc->env, virt_page1);
 211            if (tb_phys_page1 == phys_page1) {
 212                return true;
 213            }
 214        }
 215    }
 216    return false;
 217}
 218
 219static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 220                                          target_ulong cs_base, uint32_t flags,
 221                                          uint32_t cflags)
 222{
 223    tb_page_addr_t phys_pc;
 224    struct tb_desc desc;
 225    uint32_t h;
 226
 227    desc.env = cpu->env_ptr;
 228    desc.cs_base = cs_base;
 229    desc.flags = flags;
 230    desc.cflags = cflags;
 231    desc.trace_vcpu_dstate = *cpu->trace_dstate;
 232    desc.pc = pc;
 233    phys_pc = get_page_addr_code(desc.env, pc);
 234    if (phys_pc == -1) {
 235        return NULL;
 236    }
 237    desc.page_addr0 = phys_pc;
 238    h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
 239                     flags, cflags, *cpu->trace_dstate);
 240    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 241}
 242
 243/* Might cause an exception, so have a longjmp destination ready */
 244static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
 245                                          target_ulong cs_base,
 246                                          uint32_t flags, uint32_t cflags)
 247{
 248    TranslationBlock *tb;
 249    CPUJumpCache *jc;
 250    uint32_t hash;
 251
 252    /* we should never be trying to look up an INVALID tb */
 253    tcg_debug_assert(!(cflags & CF_INVALID));
 254
 255    hash = tb_jmp_cache_hash_func(pc);
 256    jc = cpu->tb_jmp_cache;
 257
 258    if (cflags & CF_PCREL) {
 259        /* Use acquire to ensure current load of pc from jc. */
 260        tb = qatomic_load_acquire(&jc->array[hash].tb);
 261
 262        if (likely(tb &&
 263                   jc->array[hash].pc == pc &&
 264                   tb->cs_base == cs_base &&
 265                   tb->flags == flags &&
 266                   tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 267                   tb_cflags(tb) == cflags)) {
 268            return tb;
 269        }
 270        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 271        if (tb == NULL) {
 272            return NULL;
 273        }
 274        jc->array[hash].pc = pc;
 275        /* Ensure pc is written first. */
 276        qatomic_store_release(&jc->array[hash].tb, tb);
 277    } else {
 278        /* Use rcu_read to ensure current load of pc from *tb. */
 279        tb = qatomic_rcu_read(&jc->array[hash].tb);
 280
 281        if (likely(tb &&
 282                   tb->pc == pc &&
 283                   tb->cs_base == cs_base &&
 284                   tb->flags == flags &&
 285                   tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 286                   tb_cflags(tb) == cflags)) {
 287            return tb;
 288        }
 289        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 290        if (tb == NULL) {
 291            return NULL;
 292        }
 293        /* Use the pc value already stored in tb->pc. */
 294        qatomic_set(&jc->array[hash].tb, tb);
 295    }
 296
 297    return tb;
 298}
 299
 300static void log_cpu_exec(target_ulong pc, CPUState *cpu,
 301                         const TranslationBlock *tb)
 302{
 303    if (qemu_log_in_addr_range(pc)) {
 304        qemu_log_mask(CPU_LOG_EXEC,
 305                      "Trace %d: %p [" TARGET_FMT_lx
 306                      "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
 307                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
 308                      tb->flags, tb->cflags, lookup_symbol(pc));
 309
 310#if defined(DEBUG_DISAS)
 311        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
 312            FILE *logfile = qemu_log_trylock();
 313            if (logfile) {
 314                int flags = 0;
 315
 316                if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
 317                    flags |= CPU_DUMP_FPU;
 318                }
 319#if defined(TARGET_I386)
 320                flags |= CPU_DUMP_CCOP;
 321#endif
 322                cpu_dump_state(cpu, logfile, flags);
 323                qemu_log_unlock(logfile);
 324            }
 325        }
 326#endif /* DEBUG_DISAS */
 327    }
 328}
 329
 330static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
 331                                       uint32_t *cflags)
 332{
 333    CPUBreakpoint *bp;
 334    bool match_page = false;
 335
 336    /*
 337     * Singlestep overrides breakpoints.
 338     * This requirement is visible in the record-replay tests, where
 339     * we would fail to make forward progress in reverse-continue.
 340     *
 341     * TODO: gdb singlestep should only override gdb breakpoints,
 342     * so that one could (gdb) singlestep into the guest kernel's
 343     * architectural breakpoint handler.
 344     */
 345    if (cpu->singlestep_enabled) {
 346        return false;
 347    }
 348
 349    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 350        /*
 351         * If we have an exact pc match, trigger the breakpoint.
 352         * Otherwise, note matches within the page.
 353         */
 354        if (pc == bp->pc) {
 355            bool match_bp = false;
 356
 357            if (bp->flags & BP_GDB) {
 358                match_bp = true;
 359            } else if (bp->flags & BP_CPU) {
 360#ifdef CONFIG_USER_ONLY
 361                g_assert_not_reached();
 362#else
 363                CPUClass *cc = CPU_GET_CLASS(cpu);
 364                assert(cc->tcg_ops->debug_check_breakpoint);
 365                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 366#endif
 367            }
 368
 369            if (match_bp) {
 370                cpu->exception_index = EXCP_DEBUG;
 371                return true;
 372            }
 373        } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
 374            match_page = true;
 375        }
 376    }
 377
 378    /*
 379     * Within the same page as a breakpoint, single-step,
 380     * returning to helper_lookup_tb_ptr after each insn looking
 381     * for the actual breakpoint.
 382     *
 383     * TODO: Perhaps better to record all of the TBs associated
 384     * with a given virtual page that contains a breakpoint, and
 385     * then invalidate them when a new overlapping breakpoint is
 386     * set on the page.  Non-overlapping TBs would not be
 387     * invalidated, nor would any TB need to be invalidated as
 388     * breakpoints are removed.
 389     */
 390    if (match_page) {
 391        *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
 392    }
 393    return false;
 394}
 395
 396static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
 397                                         uint32_t *cflags)
 398{
 399    return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
 400        check_for_breakpoints_slow(cpu, pc, cflags);
 401}
 402
 403/**
 404 * helper_lookup_tb_ptr: quick check for next tb
 405 * @env: current cpu state
 406 *
 407 * Look for an existing TB matching the current cpu state.
 408 * If found, return the code pointer.  If not found, return
 409 * the tcg epilogue so that we return into cpu_tb_exec.
 410 */
 411const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 412{
 413    CPUState *cpu = env_cpu(env);
 414    TranslationBlock *tb;
 415    target_ulong cs_base, pc;
 416    uint32_t flags, cflags;
 417
 418    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 419
 420    cflags = curr_cflags(cpu);
 421    if (check_for_breakpoints(cpu, pc, &cflags)) {
 422        cpu_loop_exit(cpu);
 423    }
 424
 425    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 426    if (tb == NULL) {
 427        return tcg_code_gen_epilogue;
 428    }
 429
 430    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 431        log_cpu_exec(pc, cpu, tb);
 432    }
 433
 434    return tb->tc.ptr;
 435}
 436
 437/* Execute a TB, and fix up the CPU state afterwards if necessary */
 438/*
 439 * Disable CFI checks.
 440 * TCG creates binary blobs at runtime, with the transformed code.
 441 * A TB is a blob of binary code, created at runtime and called with an
 442 * indirect function call. Since such function did not exist at compile time,
 443 * the CFI runtime has no way to verify its signature and would fail.
 444 * TCG is not considered a security-sensitive part of QEMU so this does not
 445 * affect the impact of CFI in environment with high security requirements
 446 */
 447static inline TranslationBlock * QEMU_DISABLE_CFI
 448cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 449{
 450    CPUArchState *env = cpu->env_ptr;
 451    uintptr_t ret;
 452    TranslationBlock *last_tb;
 453    const void *tb_ptr = itb->tc.ptr;
 454
 455    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 456        log_cpu_exec(log_pc(cpu, itb), cpu, itb);
 457    }
 458
 459    qemu_thread_jit_execute();
 460    ret = tcg_qemu_tb_exec(env, tb_ptr);
 461    cpu->can_do_io = 1;
 462    qemu_plugin_disable_mem_helpers(cpu);
 463    /*
 464     * TODO: Delay swapping back to the read-write region of the TB
 465     * until we actually need to modify the TB.  The read-only copy,
 466     * coming from the rx region, shares the same host TLB entry as
 467     * the code that executed the exit_tb opcode that arrived here.
 468     * If we insist on touching both the RX and the RW pages, we
 469     * double the host TLB pressure.
 470     */
 471    last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
 472    *tb_exit = ret & TB_EXIT_MASK;
 473
 474    trace_exec_tb_exit(last_tb, *tb_exit);
 475
 476    if (*tb_exit > TB_EXIT_IDX1) {
 477        /* We didn't start executing this TB (eg because the instruction
 478         * counter hit zero); we must restore the guest PC to the address
 479         * of the start of the TB.
 480         */
 481        CPUClass *cc = CPU_GET_CLASS(cpu);
 482
 483        if (cc->tcg_ops->synchronize_from_tb) {
 484            cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
 485        } else {
 486            tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
 487            assert(cc->set_pc);
 488            cc->set_pc(cpu, last_tb->pc);
 489        }
 490        if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
 491            target_ulong pc = log_pc(cpu, last_tb);
 492            if (qemu_log_in_addr_range(pc)) {
 493                qemu_log("Stopped execution of TB chain before %p ["
 494                         TARGET_FMT_lx "] %s\n",
 495                         last_tb->tc.ptr, pc, lookup_symbol(pc));
 496            }
 497        }
 498    }
 499
 500    /*
 501     * If gdb single-step, and we haven't raised another exception,
 502     * raise a debug exception.  Single-step with another exception
 503     * is handled in cpu_handle_exception.
 504     */
 505    if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
 506        cpu->exception_index = EXCP_DEBUG;
 507        cpu_loop_exit(cpu);
 508    }
 509
 510    return last_tb;
 511}
 512
 513
 514static void cpu_exec_enter(CPUState *cpu)
 515{
 516    CPUClass *cc = CPU_GET_CLASS(cpu);
 517
 518    if (cc->tcg_ops->cpu_exec_enter) {
 519        cc->tcg_ops->cpu_exec_enter(cpu);
 520    }
 521}
 522
 523static void cpu_exec_exit(CPUState *cpu)
 524{
 525    CPUClass *cc = CPU_GET_CLASS(cpu);
 526
 527    if (cc->tcg_ops->cpu_exec_exit) {
 528        cc->tcg_ops->cpu_exec_exit(cpu);
 529    }
 530}
 531
 532void cpu_exec_step_atomic(CPUState *cpu)
 533{
 534    CPUArchState *env = cpu->env_ptr;
 535    TranslationBlock *tb;
 536    target_ulong cs_base, pc;
 537    uint32_t flags, cflags;
 538    int tb_exit;
 539
 540    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
 541        start_exclusive();
 542        g_assert(cpu == current_cpu);
 543        g_assert(!cpu->running);
 544        cpu->running = true;
 545
 546        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 547
 548        cflags = curr_cflags(cpu);
 549        /* Execute in a serial context. */
 550        cflags &= ~CF_PARALLEL;
 551        /* After 1 insn, return and release the exclusive lock. */
 552        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
 553        /*
 554         * No need to check_for_breakpoints here.
 555         * We only arrive in cpu_exec_step_atomic after beginning execution
 556         * of an insn that includes an atomic operation we can't handle.
 557         * Any breakpoint for this insn will have been recognized earlier.
 558         */
 559
 560        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 561        if (tb == NULL) {
 562            mmap_lock();
 563            tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 564            mmap_unlock();
 565        }
 566
 567        cpu_exec_enter(cpu);
 568        /* execute the generated code */
 569        trace_exec_tb(tb, pc);
 570        cpu_tb_exec(cpu, tb, &tb_exit);
 571        cpu_exec_exit(cpu);
 572    } else {
 573#ifndef CONFIG_SOFTMMU
 574        clear_helper_retaddr();
 575        if (have_mmap_lock()) {
 576            mmap_unlock();
 577        }
 578#endif
 579        if (qemu_mutex_iothread_locked()) {
 580            qemu_mutex_unlock_iothread();
 581        }
 582        assert_no_pages_locked();
 583    }
 584
 585    /*
 586     * As we start the exclusive region before codegen we must still
 587     * be in the region if we longjump out of either the codegen or
 588     * the execution.
 589     */
 590    g_assert(cpu_in_exclusive_context(cpu));
 591    cpu->running = false;
 592    end_exclusive();
 593}
 594
 595void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 596{
 597    /*
 598     * Get the rx view of the structure, from which we find the
 599     * executable code address, and tb_target_set_jmp_target can
 600     * produce a pc-relative displacement to jmp_target_addr[n].
 601     */
 602    const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb);
 603    uintptr_t offset = tb->jmp_insn_offset[n];
 604    uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset;
 605    uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
 606
 607    tb->jmp_target_addr[n] = addr;
 608    tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw);
 609}
 610
 611static inline void tb_add_jump(TranslationBlock *tb, int n,
 612                               TranslationBlock *tb_next)
 613{
 614    uintptr_t old;
 615
 616    qemu_thread_jit_write();
 617    assert(n < ARRAY_SIZE(tb->jmp_list_next));
 618    qemu_spin_lock(&tb_next->jmp_lock);
 619
 620    /* make sure the destination TB is valid */
 621    if (tb_next->cflags & CF_INVALID) {
 622        goto out_unlock_next;
 623    }
 624    /* Atomically claim the jump destination slot only if it was NULL */
 625    old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
 626                          (uintptr_t)tb_next);
 627    if (old) {
 628        goto out_unlock_next;
 629    }
 630
 631    /* patch the native jump address */
 632    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
 633
 634    /* add in TB jmp list */
 635    tb->jmp_list_next[n] = tb_next->jmp_list_head;
 636    tb_next->jmp_list_head = (uintptr_t)tb | n;
 637
 638    qemu_spin_unlock(&tb_next->jmp_lock);
 639
 640    qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
 641                  tb->tc.ptr, n, tb_next->tc.ptr);
 642    return;
 643
 644 out_unlock_next:
 645    qemu_spin_unlock(&tb_next->jmp_lock);
 646    return;
 647}
 648
 649static inline bool cpu_handle_halt(CPUState *cpu)
 650{
 651#ifndef CONFIG_USER_ONLY
 652    if (cpu->halted) {
 653#if defined(TARGET_I386)
 654        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 655            X86CPU *x86_cpu = X86_CPU(cpu);
 656            qemu_mutex_lock_iothread();
 657            apic_poll_irq(x86_cpu->apic_state);
 658            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
 659            qemu_mutex_unlock_iothread();
 660        }
 661#endif /* TARGET_I386 */
 662        if (!cpu_has_work(cpu)) {
 663            return true;
 664        }
 665
 666        cpu->halted = 0;
 667    }
 668#endif /* !CONFIG_USER_ONLY */
 669
 670    return false;
 671}
 672
 673static inline void cpu_handle_debug_exception(CPUState *cpu)
 674{
 675    CPUClass *cc = CPU_GET_CLASS(cpu);
 676    CPUWatchpoint *wp;
 677
 678    if (!cpu->watchpoint_hit) {
 679        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 680            wp->flags &= ~BP_WATCHPOINT_HIT;
 681        }
 682    }
 683
 684    if (cc->tcg_ops->debug_excp_handler) {
 685        cc->tcg_ops->debug_excp_handler(cpu);
 686    }
 687}
 688
 689static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 690{
 691    if (cpu->exception_index < 0) {
 692#ifndef CONFIG_USER_ONLY
 693        if (replay_has_exception()
 694            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
 695            /* Execute just one insn to trigger exception pending in the log */
 696            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
 697                | CF_NOIRQ | 1;
 698        }
 699#endif
 700        return false;
 701    }
 702    if (cpu->exception_index >= EXCP_INTERRUPT) {
 703        /* exit request from the cpu execution loop */
 704        *ret = cpu->exception_index;
 705        if (*ret == EXCP_DEBUG) {
 706            cpu_handle_debug_exception(cpu);
 707        }
 708        cpu->exception_index = -1;
 709        return true;
 710    } else {
 711#if defined(CONFIG_USER_ONLY)
 712        /* if user mode only, we simulate a fake exception
 713           which will be handled outside the cpu execution
 714           loop */
 715#if defined(TARGET_I386)
 716        CPUClass *cc = CPU_GET_CLASS(cpu);
 717        cc->tcg_ops->fake_user_interrupt(cpu);
 718#endif /* TARGET_I386 */
 719        *ret = cpu->exception_index;
 720        cpu->exception_index = -1;
 721        return true;
 722#else
 723        if (replay_exception()) {
 724            CPUClass *cc = CPU_GET_CLASS(cpu);
 725            qemu_mutex_lock_iothread();
 726            cc->tcg_ops->do_interrupt(cpu);
 727            qemu_mutex_unlock_iothread();
 728            cpu->exception_index = -1;
 729
 730            if (unlikely(cpu->singlestep_enabled)) {
 731                /*
 732                 * After processing the exception, ensure an EXCP_DEBUG is
 733                 * raised when single-stepping so that GDB doesn't miss the
 734                 * next instruction.
 735                 */
 736                *ret = EXCP_DEBUG;
 737                cpu_handle_debug_exception(cpu);
 738                return true;
 739            }
 740        } else if (!replay_has_interrupt()) {
 741            /* give a chance to iothread in replay mode */
 742            *ret = EXCP_INTERRUPT;
 743            return true;
 744        }
 745#endif
 746    }
 747
 748    return false;
 749}
 750
 751#ifndef CONFIG_USER_ONLY
 752/*
 753 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 754 * "real" interrupt event later. It does not need to be recorded for
 755 * replay purposes.
 756 */
 757static inline bool need_replay_interrupt(int interrupt_request)
 758{
 759#if defined(TARGET_I386)
 760    return !(interrupt_request & CPU_INTERRUPT_POLL);
 761#else
 762    return true;
 763#endif
 764}
 765#endif /* !CONFIG_USER_ONLY */
 766
 767static inline bool cpu_handle_interrupt(CPUState *cpu,
 768                                        TranslationBlock **last_tb)
 769{
 770    /*
 771     * If we have requested custom cflags with CF_NOIRQ we should
 772     * skip checking here. Any pending interrupts will get picked up
 773     * by the next TB we execute under normal cflags.
 774     */
 775    if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
 776        return false;
 777    }
 778
 779    /* Clear the interrupt flag now since we're processing
 780     * cpu->interrupt_request and cpu->exit_request.
 781     * Ensure zeroing happens before reading cpu->exit_request or
 782     * cpu->interrupt_request (see also smp_wmb in cpu_exit())
 783     */
 784    qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
 785
 786    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
 787        int interrupt_request;
 788        qemu_mutex_lock_iothread();
 789        interrupt_request = cpu->interrupt_request;
 790        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
 791            /* Mask out external interrupts for this step. */
 792            interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
 793        }
 794        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
 795            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
 796            cpu->exception_index = EXCP_DEBUG;
 797            qemu_mutex_unlock_iothread();
 798            return true;
 799        }
 800#if !defined(CONFIG_USER_ONLY)
 801        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
 802            /* Do nothing */
 803        } else if (interrupt_request & CPU_INTERRUPT_HALT) {
 804            replay_interrupt();
 805            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
 806            cpu->halted = 1;
 807            cpu->exception_index = EXCP_HLT;
 808            qemu_mutex_unlock_iothread();
 809            return true;
 810        }
 811#if defined(TARGET_I386)
 812        else if (interrupt_request & CPU_INTERRUPT_INIT) {
 813            X86CPU *x86_cpu = X86_CPU(cpu);
 814            CPUArchState *env = &x86_cpu->env;
 815            replay_interrupt();
 816            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
 817            do_cpu_init(x86_cpu);
 818            cpu->exception_index = EXCP_HALTED;
 819            qemu_mutex_unlock_iothread();
 820            return true;
 821        }
 822#else
 823        else if (interrupt_request & CPU_INTERRUPT_RESET) {
 824            replay_interrupt();
 825            cpu_reset(cpu);
 826            qemu_mutex_unlock_iothread();
 827            return true;
 828        }
 829#endif /* !TARGET_I386 */
 830        /* The target hook has 3 exit conditions:
 831           False when the interrupt isn't processed,
 832           True when it is, and we should restart on a new TB,
 833           and via longjmp via cpu_loop_exit.  */
 834        else {
 835            CPUClass *cc = CPU_GET_CLASS(cpu);
 836
 837            if (cc->tcg_ops->cpu_exec_interrupt &&
 838                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
 839                if (need_replay_interrupt(interrupt_request)) {
 840                    replay_interrupt();
 841                }
 842                /*
 843                 * After processing the interrupt, ensure an EXCP_DEBUG is
 844                 * raised when single-stepping so that GDB doesn't miss the
 845                 * next instruction.
 846                 */
 847                if (unlikely(cpu->singlestep_enabled)) {
 848                    cpu->exception_index = EXCP_DEBUG;
 849                    qemu_mutex_unlock_iothread();
 850                    return true;
 851                }
 852                cpu->exception_index = -1;
 853                *last_tb = NULL;
 854            }
 855            /* The target hook may have updated the 'cpu->interrupt_request';
 856             * reload the 'interrupt_request' value */
 857            interrupt_request = cpu->interrupt_request;
 858        }
 859#endif /* !CONFIG_USER_ONLY */
 860        if (interrupt_request & CPU_INTERRUPT_EXITTB) {
 861            cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
 862            /* ensure that no TB jump will be modified as
 863               the program flow was changed */
 864            *last_tb = NULL;
 865        }
 866
 867        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
 868        qemu_mutex_unlock_iothread();
 869    }
 870
 871    /* Finally, check if we need to exit to the main loop.  */
 872    if (unlikely(qatomic_read(&cpu->exit_request))
 873        || (icount_enabled()
 874            && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
 875            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
 876        qatomic_set(&cpu->exit_request, 0);
 877        if (cpu->exception_index == -1) {
 878            cpu->exception_index = EXCP_INTERRUPT;
 879        }
 880        return true;
 881    }
 882
 883    return false;
 884}
 885
 886static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 887                                    target_ulong pc,
 888                                    TranslationBlock **last_tb, int *tb_exit)
 889{
 890    int32_t insns_left;
 891
 892    trace_exec_tb(tb, pc);
 893    tb = cpu_tb_exec(cpu, tb, tb_exit);
 894    if (*tb_exit != TB_EXIT_REQUESTED) {
 895        *last_tb = tb;
 896        return;
 897    }
 898
 899    *last_tb = NULL;
 900    insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
 901    if (insns_left < 0) {
 902        /* Something asked us to stop executing chained TBs; just
 903         * continue round the main loop. Whatever requested the exit
 904         * will also have set something else (eg exit_request or
 905         * interrupt_request) which will be handled by
 906         * cpu_handle_interrupt.  cpu_handle_interrupt will also
 907         * clear cpu->icount_decr.u16.high.
 908         */
 909        return;
 910    }
 911
 912    /* Instruction counter expired.  */
 913    assert(icount_enabled());
 914#ifndef CONFIG_USER_ONLY
 915    /* Ensure global icount has gone forward */
 916    icount_update(cpu);
 917    /* Refill decrementer and continue execution.  */
 918    insns_left = MIN(0xffff, cpu->icount_budget);
 919    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 920    cpu->icount_extra = cpu->icount_budget - insns_left;
 921
 922    /*
 923     * If the next tb has more instructions than we have left to
 924     * execute we need to ensure we find/generate a TB with exactly
 925     * insns_left instructions in it.
 926     */
 927    if (insns_left > 0 && insns_left < tb->icount)  {
 928        assert(insns_left <= CF_COUNT_MASK);
 929        assert(cpu->icount_extra == 0);
 930        cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
 931    }
 932#endif
 933}
 934
 935/* main execution loop */
 936
 937static int __attribute__((noinline))
 938cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
 939{
 940    int ret;
 941
 942    /* if an exception is pending, we execute it here */
 943    while (!cpu_handle_exception(cpu, &ret)) {
 944        TranslationBlock *last_tb = NULL;
 945        int tb_exit = 0;
 946
 947        while (!cpu_handle_interrupt(cpu, &last_tb)) {
 948            TranslationBlock *tb;
 949            target_ulong cs_base, pc;
 950            uint32_t flags, cflags;
 951
 952            cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
 953
 954            /*
 955             * When requested, use an exact setting for cflags for the next
 956             * execution.  This is used for icount, precise smc, and stop-
 957             * after-access watchpoints.  Since this request should never
 958             * have CF_INVALID set, -1 is a convenient invalid value that
 959             * does not require tcg headers for cpu_common_reset.
 960             */
 961            cflags = cpu->cflags_next_tb;
 962            if (cflags == -1) {
 963                cflags = curr_cflags(cpu);
 964            } else {
 965                cpu->cflags_next_tb = -1;
 966            }
 967
 968            if (check_for_breakpoints(cpu, pc, &cflags)) {
 969                break;
 970            }
 971
 972            tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 973            if (tb == NULL) {
 974                CPUJumpCache *jc;
 975                uint32_t h;
 976
 977                mmap_lock();
 978                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 979                mmap_unlock();
 980
 981                /*
 982                 * We add the TB in the virtual pc hash table
 983                 * for the fast lookup
 984                 */
 985                h = tb_jmp_cache_hash_func(pc);
 986                jc = cpu->tb_jmp_cache;
 987                if (cflags & CF_PCREL) {
 988                    jc->array[h].pc = pc;
 989                    /* Ensure pc is written first. */
 990                    qatomic_store_release(&jc->array[h].tb, tb);
 991                } else {
 992                    /* Use the pc value already stored in tb->pc. */
 993                    qatomic_set(&jc->array[h].tb, tb);
 994                }
 995            }
 996
 997#ifndef CONFIG_USER_ONLY
 998            /*
 999             * We don't take care of direct jumps when address mapping
1000             * changes in system emulation.  So it's not safe to make a
1001             * direct jump to a TB spanning two pages because the mapping
1002             * for the second page can change.
1003             */
1004            if (tb_page_addr1(tb) != -1) {
1005                last_tb = NULL;
1006            }
1007#endif
1008            /* See if we can patch the calling TB. */
1009            if (last_tb) {
1010                tb_add_jump(last_tb, tb_exit, tb);
1011            }
1012
1013            cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
1014
1015            /* Try to align the host and virtual clocks
1016               if the guest is in advance */
1017            align_clocks(sc, cpu);
1018        }
1019    }
1020    return ret;
1021}
1022
1023static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
1024{
1025    /* Prepare setjmp context for exception handling. */
1026    if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
1027        /* Non-buggy compilers preserve this; assert the correct value. */
1028        g_assert(cpu == current_cpu);
1029
1030#ifndef CONFIG_SOFTMMU
1031        clear_helper_retaddr();
1032        if (have_mmap_lock()) {
1033            mmap_unlock();
1034        }
1035#endif
1036        if (qemu_mutex_iothread_locked()) {
1037            qemu_mutex_unlock_iothread();
1038        }
1039
1040        assert_no_pages_locked();
1041    }
1042
1043    return cpu_exec_loop(cpu, sc);
1044}
1045
1046int cpu_exec(CPUState *cpu)
1047{
1048    int ret;
1049    SyncClocks sc = { 0 };
1050
1051    /* replay_interrupt may need current_cpu */
1052    current_cpu = cpu;
1053
1054    if (cpu_handle_halt(cpu)) {
1055        return EXCP_HALTED;
1056    }
1057
1058    rcu_read_lock();
1059    cpu_exec_enter(cpu);
1060
1061    /*
1062     * Calculate difference between guest clock and host clock.
1063     * This delay includes the delay of the last cycle, so
1064     * what we have to do is sleep until it is 0. As for the
1065     * advance/delay we gain here, we try to fix it next time.
1066     */
1067    init_delay_params(&sc, cpu);
1068
1069    ret = cpu_exec_setjmp(cpu, &sc);
1070
1071    cpu_exec_exit(cpu);
1072    rcu_read_unlock();
1073
1074    return ret;
1075}
1076
1077void tcg_exec_realizefn(CPUState *cpu, Error **errp)
1078{
1079    static bool tcg_target_initialized;
1080    CPUClass *cc = CPU_GET_CLASS(cpu);
1081
1082    if (!tcg_target_initialized) {
1083        cc->tcg_ops->initialize();
1084        tcg_target_initialized = true;
1085    }
1086
1087    cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
1088    tlb_init(cpu);
1089#ifndef CONFIG_USER_ONLY
1090    tcg_iommu_init_notifier_list(cpu);
1091#endif /* !CONFIG_USER_ONLY */
1092    /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
1093}
1094
1095/* undo the initializations in reverse order */
1096void tcg_exec_unrealizefn(CPUState *cpu)
1097{
1098#ifndef CONFIG_USER_ONLY
1099    tcg_iommu_free_notifier_list(cpu);
1100#endif /* !CONFIG_USER_ONLY */
1101
1102    tlb_destroy(cpu);
1103    g_free_rcu(cpu->tb_jmp_cache, rcu);
1104}
1105