qemu/accel/tcg/translate-all.c
<<
>>
Prefs
   1/*
   2 *  Host code generation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22
  23#define NO_CPU_IO_DEFS
  24#include "trace.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg.h"
  28#if defined(CONFIG_USER_ONLY)
  29#include "qemu.h"
  30#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  31#include <sys/param.h>
  32#if __FreeBSD_version >= 700104
  33#define HAVE_KINFO_GETVMMAP
  34#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  35#include <sys/proc.h>
  36#include <machine/profile.h>
  37#define _KERNEL
  38#include <sys/user.h>
  39#undef _KERNEL
  40#undef sigqueue
  41#include <libutil.h>
  42#endif
  43#endif
  44#else
  45#include "exec/ram_addr.h"
  46#endif
  47
  48#include "exec/cputlb.h"
  49#include "exec/translate-all.h"
  50#include "qemu/bitmap.h"
  51#include "qemu/qemu-print.h"
  52#include "qemu/timer.h"
  53#include "qemu/main-loop.h"
  54#include "exec/log.h"
  55#include "sysemu/cpus.h"
  56#include "sysemu/cpu-timers.h"
  57#include "sysemu/tcg.h"
  58#include "qapi/error.h"
  59#include "hw/core/tcg-cpu-ops.h"
  60#include "tb-hash.h"
  61#include "tb-context.h"
  62#include "internal.h"
  63
  64/* #define DEBUG_TB_INVALIDATE */
  65/* #define DEBUG_TB_FLUSH */
  66/* make various TB consistency checks */
  67/* #define DEBUG_TB_CHECK */
  68
  69#ifdef DEBUG_TB_INVALIDATE
  70#define DEBUG_TB_INVALIDATE_GATE 1
  71#else
  72#define DEBUG_TB_INVALIDATE_GATE 0
  73#endif
  74
  75#ifdef DEBUG_TB_FLUSH
  76#define DEBUG_TB_FLUSH_GATE 1
  77#else
  78#define DEBUG_TB_FLUSH_GATE 0
  79#endif
  80
  81#if !defined(CONFIG_USER_ONLY)
  82/* TB consistency checks only implemented for usermode emulation.  */
  83#undef DEBUG_TB_CHECK
  84#endif
  85
  86#ifdef DEBUG_TB_CHECK
  87#define DEBUG_TB_CHECK_GATE 1
  88#else
  89#define DEBUG_TB_CHECK_GATE 0
  90#endif
  91
  92/* Access to the various translations structures need to be serialised via locks
  93 * for consistency.
  94 * In user-mode emulation access to the memory related structures are protected
  95 * with mmap_lock.
  96 * In !user-mode we use per-page locks.
  97 */
  98#ifdef CONFIG_SOFTMMU
  99#define assert_memory_lock()
 100#else
 101#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
 102#endif
 103
 104#define SMC_BITMAP_USE_THRESHOLD 10
 105
 106typedef struct PageDesc {
 107    /* list of TBs intersecting this ram page */
 108    uintptr_t first_tb;
 109#ifdef CONFIG_SOFTMMU
 110    /* in order to optimize self modifying code, we count the number
 111       of lookups we do to a given page to use a bitmap */
 112    unsigned long *code_bitmap;
 113    unsigned int code_write_count;
 114#else
 115    unsigned long flags;
 116    void *target_data;
 117#endif
 118#ifndef CONFIG_USER_ONLY
 119    QemuSpin lock;
 120#endif
 121} PageDesc;
 122
 123/**
 124 * struct page_entry - page descriptor entry
 125 * @pd:     pointer to the &struct PageDesc of the page this entry represents
 126 * @index:  page index of the page
 127 * @locked: whether the page is locked
 128 *
 129 * This struct helps us keep track of the locked state of a page, without
 130 * bloating &struct PageDesc.
 131 *
 132 * A page lock protects accesses to all fields of &struct PageDesc.
 133 *
 134 * See also: &struct page_collection.
 135 */
 136struct page_entry {
 137    PageDesc *pd;
 138    tb_page_addr_t index;
 139    bool locked;
 140};
 141
 142/**
 143 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
 144 * @tree:   Binary search tree (BST) of the pages, with key == page index
 145 * @max:    Pointer to the page in @tree with the highest page index
 146 *
 147 * To avoid deadlock we lock pages in ascending order of page index.
 148 * When operating on a set of pages, we need to keep track of them so that
 149 * we can lock them in order and also unlock them later. For this we collect
 150 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
 151 * @tree implementation we use does not provide an O(1) operation to obtain the
 152 * highest-ranked element, we use @max to keep track of the inserted page
 153 * with the highest index. This is valuable because if a page is not in
 154 * the tree and its index is higher than @max's, then we can lock it
 155 * without breaking the locking order rule.
 156 *
 157 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 158 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 159 *
 160 * See also: page_collection_lock().
 161 */
 162struct page_collection {
 163    GTree *tree;
 164    struct page_entry *max;
 165};
 166
 167/* list iterators for lists of tagged pointers in TranslationBlock */
 168#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
 169    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
 170         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
 171             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
 172
 173#define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
 174    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
 175
 176#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
 177    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 178
 179/*
 180 * In system mode we want L1_MAP to be based on ram offsets,
 181 * while in user mode we want it to be based on virtual addresses.
 182 *
 183 * TODO: For user mode, see the caveat re host vs guest virtual
 184 * address spaces near GUEST_ADDR_MAX.
 185 */
 186#if !defined(CONFIG_USER_ONLY)
 187#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 188# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 189#else
 190# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 191#endif
 192#else
 193# define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
 194#endif
 195
 196/* Size of the L2 (and L3, etc) page tables.  */
 197#define V_L2_BITS 10
 198#define V_L2_SIZE (1 << V_L2_BITS)
 199
 200/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
 201QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
 202                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
 203                  * BITS_PER_BYTE);
 204
 205/*
 206 * L1 Mapping properties
 207 */
 208static int v_l1_size;
 209static int v_l1_shift;
 210static int v_l2_levels;
 211
 212/* The bottom level has pointers to PageDesc, and is indexed by
 213 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
 214 */
 215#define V_L1_MIN_BITS 4
 216#define V_L1_MAX_BITS (V_L2_BITS + 3)
 217#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
 218
 219static void *l1_map[V_L1_MAX_SIZE];
 220
 221TBContext tb_ctx;
 222
 223static void page_table_config_init(void)
 224{
 225    uint32_t v_l1_bits;
 226
 227    assert(TARGET_PAGE_BITS);
 228    /* The bits remaining after N lower levels of page tables.  */
 229    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 230    if (v_l1_bits < V_L1_MIN_BITS) {
 231        v_l1_bits += V_L2_BITS;
 232    }
 233
 234    v_l1_size = 1 << v_l1_bits;
 235    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 236    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 237
 238    assert(v_l1_bits <= V_L1_MAX_BITS);
 239    assert(v_l1_shift % V_L2_BITS == 0);
 240    assert(v_l2_levels >= 0);
 241}
 242
 243/* Encode VAL as a signed leb128 sequence at P.
 244   Return P incremented past the encoded value.  */
 245static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 246{
 247    int more, byte;
 248
 249    do {
 250        byte = val & 0x7f;
 251        val >>= 7;
 252        more = !((val == 0 && (byte & 0x40) == 0)
 253                 || (val == -1 && (byte & 0x40) != 0));
 254        if (more) {
 255            byte |= 0x80;
 256        }
 257        *p++ = byte;
 258    } while (more);
 259
 260    return p;
 261}
 262
 263/* Decode a signed leb128 sequence at *PP; increment *PP past the
 264   decoded value.  Return the decoded value.  */
 265static target_long decode_sleb128(const uint8_t **pp)
 266{
 267    const uint8_t *p = *pp;
 268    target_long val = 0;
 269    int byte, shift = 0;
 270
 271    do {
 272        byte = *p++;
 273        val |= (target_ulong)(byte & 0x7f) << shift;
 274        shift += 7;
 275    } while (byte & 0x80);
 276    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 277        val |= -(target_ulong)1 << shift;
 278    }
 279
 280    *pp = p;
 281    return val;
 282}
 283
 284/* Encode the data collected about the instructions while compiling TB.
 285   Place the data at BLOCK, and return the number of bytes consumed.
 286
 287   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 288   which come from the target's insn_start data, followed by a uintptr_t
 289   which comes from the host pc of the end of the code implementing the insn.
 290
 291   Each line of the table is encoded as sleb128 deltas from the previous
 292   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 293   That is, the first column is seeded with the guest pc, the last column
 294   with the host pc, and the middle columns with zeros.  */
 295
 296static int encode_search(TranslationBlock *tb, uint8_t *block)
 297{
 298    uint8_t *highwater = tcg_ctx->code_gen_highwater;
 299    uint8_t *p = block;
 300    int i, j, n;
 301
 302    for (i = 0, n = tb->icount; i < n; ++i) {
 303        target_ulong prev;
 304
 305        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 306            if (i == 0) {
 307                prev = (j == 0 ? tb->pc : 0);
 308            } else {
 309                prev = tcg_ctx->gen_insn_data[i - 1][j];
 310            }
 311            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
 312        }
 313        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
 314        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
 315
 316        /* Test for (pending) buffer overflow.  The assumption is that any
 317           one row beginning below the high water mark cannot overrun
 318           the buffer completely.  Thus we can test for overflow after
 319           encoding a row without having to check during encoding.  */
 320        if (unlikely(p > highwater)) {
 321            return -1;
 322        }
 323    }
 324
 325    return p - block;
 326}
 327
 328/* The cpu state corresponding to 'searched_pc' is restored.
 329 * When reset_icount is true, current TB will be interrupted and
 330 * icount should be recalculated.
 331 */
 332static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 333                                     uintptr_t searched_pc, bool reset_icount)
 334{
 335    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
 336    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
 337    CPUArchState *env = cpu->env_ptr;
 338    const uint8_t *p = tb->tc.ptr + tb->tc.size;
 339    int i, j, num_insns = tb->icount;
 340#ifdef CONFIG_PROFILER
 341    TCGProfile *prof = &tcg_ctx->prof;
 342    int64_t ti = profile_getclock();
 343#endif
 344
 345    searched_pc -= GETPC_ADJ;
 346
 347    if (searched_pc < host_pc) {
 348        return -1;
 349    }
 350
 351    /* Reconstruct the stored insn data while looking for the point at
 352       which the end of the insn exceeds the searched_pc.  */
 353    for (i = 0; i < num_insns; ++i) {
 354        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 355            data[j] += decode_sleb128(&p);
 356        }
 357        host_pc += decode_sleb128(&p);
 358        if (host_pc > searched_pc) {
 359            goto found;
 360        }
 361    }
 362    return -1;
 363
 364 found:
 365    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
 366        assert(icount_enabled());
 367        /* Reset the cycle counter to the start of the block
 368           and shift if to the number of actually executed instructions */
 369        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
 370    }
 371    restore_state_to_opc(env, tb, data);
 372
 373#ifdef CONFIG_PROFILER
 374    qatomic_set(&prof->restore_time,
 375                prof->restore_time + profile_getclock() - ti);
 376    qatomic_set(&prof->restore_count, prof->restore_count + 1);
 377#endif
 378    return 0;
 379}
 380
 381bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
 382{
 383    /*
 384     * The host_pc has to be in the rx region of the code buffer.
 385     * If it is not we will not be able to resolve it here.
 386     * The two cases where host_pc will not be correct are:
 387     *
 388     *  - fault during translation (instruction fetch)
 389     *  - fault from helper (not using GETPC() macro)
 390     *
 391     * Either way we need return early as we can't resolve it here.
 392     */
 393    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 394        TranslationBlock *tb = tcg_tb_lookup(host_pc);
 395        if (tb) {
 396            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
 397            return true;
 398        }
 399    }
 400    return false;
 401}
 402
 403void page_init(void)
 404{
 405    page_size_init();
 406    page_table_config_init();
 407
 408#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 409    {
 410#ifdef HAVE_KINFO_GETVMMAP
 411        struct kinfo_vmentry *freep;
 412        int i, cnt;
 413
 414        freep = kinfo_getvmmap(getpid(), &cnt);
 415        if (freep) {
 416            mmap_lock();
 417            for (i = 0; i < cnt; i++) {
 418                unsigned long startaddr, endaddr;
 419
 420                startaddr = freep[i].kve_start;
 421                endaddr = freep[i].kve_end;
 422                if (h2g_valid(startaddr)) {
 423                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 424
 425                    if (h2g_valid(endaddr)) {
 426                        endaddr = h2g(endaddr);
 427                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 428                    } else {
 429#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 430                        endaddr = ~0ul;
 431                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 432#endif
 433                    }
 434                }
 435            }
 436            free(freep);
 437            mmap_unlock();
 438        }
 439#else
 440        FILE *f;
 441
 442        last_brk = (unsigned long)sbrk(0);
 443
 444        f = fopen("/compat/linux/proc/self/maps", "r");
 445        if (f) {
 446            mmap_lock();
 447
 448            do {
 449                unsigned long startaddr, endaddr;
 450                int n;
 451
 452                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 453
 454                if (n == 2 && h2g_valid(startaddr)) {
 455                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 456
 457                    if (h2g_valid(endaddr)) {
 458                        endaddr = h2g(endaddr);
 459                    } else {
 460                        endaddr = ~0ul;
 461                    }
 462                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 463                }
 464            } while (!feof(f));
 465
 466            fclose(f);
 467            mmap_unlock();
 468        }
 469#endif
 470    }
 471#endif
 472}
 473
 474static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 475{
 476    PageDesc *pd;
 477    void **lp;
 478    int i;
 479
 480    /* Level 1.  Always allocated.  */
 481    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 482
 483    /* Level 2..N-1.  */
 484    for (i = v_l2_levels; i > 0; i--) {
 485        void **p = qatomic_rcu_read(lp);
 486
 487        if (p == NULL) {
 488            void *existing;
 489
 490            if (!alloc) {
 491                return NULL;
 492            }
 493            p = g_new0(void *, V_L2_SIZE);
 494            existing = qatomic_cmpxchg(lp, NULL, p);
 495            if (unlikely(existing)) {
 496                g_free(p);
 497                p = existing;
 498            }
 499        }
 500
 501        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 502    }
 503
 504    pd = qatomic_rcu_read(lp);
 505    if (pd == NULL) {
 506        void *existing;
 507
 508        if (!alloc) {
 509            return NULL;
 510        }
 511        pd = g_new0(PageDesc, V_L2_SIZE);
 512#ifndef CONFIG_USER_ONLY
 513        {
 514            int i;
 515
 516            for (i = 0; i < V_L2_SIZE; i++) {
 517                qemu_spin_init(&pd[i].lock);
 518            }
 519        }
 520#endif
 521        existing = qatomic_cmpxchg(lp, NULL, pd);
 522        if (unlikely(existing)) {
 523#ifndef CONFIG_USER_ONLY
 524            {
 525                int i;
 526
 527                for (i = 0; i < V_L2_SIZE; i++) {
 528                    qemu_spin_destroy(&pd[i].lock);
 529                }
 530            }
 531#endif
 532            g_free(pd);
 533            pd = existing;
 534        }
 535    }
 536
 537    return pd + (index & (V_L2_SIZE - 1));
 538}
 539
 540static inline PageDesc *page_find(tb_page_addr_t index)
 541{
 542    return page_find_alloc(index, 0);
 543}
 544
 545static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 546                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
 547
 548/* In user-mode page locks aren't used; mmap_lock is enough */
 549#ifdef CONFIG_USER_ONLY
 550
 551#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
 552
 553static inline void page_lock(PageDesc *pd)
 554{ }
 555
 556static inline void page_unlock(PageDesc *pd)
 557{ }
 558
 559static inline void page_lock_tb(const TranslationBlock *tb)
 560{ }
 561
 562static inline void page_unlock_tb(const TranslationBlock *tb)
 563{ }
 564
 565struct page_collection *
 566page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 567{
 568    return NULL;
 569}
 570
 571void page_collection_unlock(struct page_collection *set)
 572{ }
 573#else /* !CONFIG_USER_ONLY */
 574
 575#ifdef CONFIG_DEBUG_TCG
 576
 577static __thread GHashTable *ht_pages_locked_debug;
 578
 579static void ht_pages_locked_debug_init(void)
 580{
 581    if (ht_pages_locked_debug) {
 582        return;
 583    }
 584    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 585}
 586
 587static bool page_is_locked(const PageDesc *pd)
 588{
 589    PageDesc *found;
 590
 591    ht_pages_locked_debug_init();
 592    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 593    return !!found;
 594}
 595
 596static void page_lock__debug(PageDesc *pd)
 597{
 598    ht_pages_locked_debug_init();
 599    g_assert(!page_is_locked(pd));
 600    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 601}
 602
 603static void page_unlock__debug(const PageDesc *pd)
 604{
 605    bool removed;
 606
 607    ht_pages_locked_debug_init();
 608    g_assert(page_is_locked(pd));
 609    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 610    g_assert(removed);
 611}
 612
 613static void
 614do_assert_page_locked(const PageDesc *pd, const char *file, int line)
 615{
 616    if (unlikely(!page_is_locked(pd))) {
 617        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 618                     pd, file, line);
 619        abort();
 620    }
 621}
 622
 623#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
 624
 625void assert_no_pages_locked(void)
 626{
 627    ht_pages_locked_debug_init();
 628    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 629}
 630
 631#else /* !CONFIG_DEBUG_TCG */
 632
 633#define assert_page_locked(pd)
 634
 635static inline void page_lock__debug(const PageDesc *pd)
 636{
 637}
 638
 639static inline void page_unlock__debug(const PageDesc *pd)
 640{
 641}
 642
 643#endif /* CONFIG_DEBUG_TCG */
 644
 645static inline void page_lock(PageDesc *pd)
 646{
 647    page_lock__debug(pd);
 648    qemu_spin_lock(&pd->lock);
 649}
 650
 651static inline void page_unlock(PageDesc *pd)
 652{
 653    qemu_spin_unlock(&pd->lock);
 654    page_unlock__debug(pd);
 655}
 656
 657/* lock the page(s) of a TB in the correct acquisition order */
 658static inline void page_lock_tb(const TranslationBlock *tb)
 659{
 660    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
 661}
 662
 663static inline void page_unlock_tb(const TranslationBlock *tb)
 664{
 665    PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 666
 667    page_unlock(p1);
 668    if (unlikely(tb->page_addr[1] != -1)) {
 669        PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 670
 671        if (p2 != p1) {
 672            page_unlock(p2);
 673        }
 674    }
 675}
 676
 677static inline struct page_entry *
 678page_entry_new(PageDesc *pd, tb_page_addr_t index)
 679{
 680    struct page_entry *pe = g_malloc(sizeof(*pe));
 681
 682    pe->index = index;
 683    pe->pd = pd;
 684    pe->locked = false;
 685    return pe;
 686}
 687
 688static void page_entry_destroy(gpointer p)
 689{
 690    struct page_entry *pe = p;
 691
 692    g_assert(pe->locked);
 693    page_unlock(pe->pd);
 694    g_free(pe);
 695}
 696
 697/* returns false on success */
 698static bool page_entry_trylock(struct page_entry *pe)
 699{
 700    bool busy;
 701
 702    busy = qemu_spin_trylock(&pe->pd->lock);
 703    if (!busy) {
 704        g_assert(!pe->locked);
 705        pe->locked = true;
 706        page_lock__debug(pe->pd);
 707    }
 708    return busy;
 709}
 710
 711static void do_page_entry_lock(struct page_entry *pe)
 712{
 713    page_lock(pe->pd);
 714    g_assert(!pe->locked);
 715    pe->locked = true;
 716}
 717
 718static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 719{
 720    struct page_entry *pe = value;
 721
 722    do_page_entry_lock(pe);
 723    return FALSE;
 724}
 725
 726static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 727{
 728    struct page_entry *pe = value;
 729
 730    if (pe->locked) {
 731        pe->locked = false;
 732        page_unlock(pe->pd);
 733    }
 734    return FALSE;
 735}
 736
 737/*
 738 * Trylock a page, and if successful, add the page to a collection.
 739 * Returns true ("busy") if the page could not be locked; false otherwise.
 740 */
 741static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 742{
 743    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 744    struct page_entry *pe;
 745    PageDesc *pd;
 746
 747    pe = g_tree_lookup(set->tree, &index);
 748    if (pe) {
 749        return false;
 750    }
 751
 752    pd = page_find(index);
 753    if (pd == NULL) {
 754        return false;
 755    }
 756
 757    pe = page_entry_new(pd, index);
 758    g_tree_insert(set->tree, &pe->index, pe);
 759
 760    /*
 761     * If this is either (1) the first insertion or (2) a page whose index
 762     * is higher than any other so far, just lock the page and move on.
 763     */
 764    if (set->max == NULL || pe->index > set->max->index) {
 765        set->max = pe;
 766        do_page_entry_lock(pe);
 767        return false;
 768    }
 769    /*
 770     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 771     * locks in order.
 772     */
 773    return page_entry_trylock(pe);
 774}
 775
 776static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 777{
 778    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 779    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 780
 781    if (a == b) {
 782        return 0;
 783    } else if (a < b) {
 784        return -1;
 785    }
 786    return 1;
 787}
 788
 789/*
 790 * Lock a range of pages ([@start,@end[) as well as the pages of all
 791 * intersecting TBs.
 792 * Locking order: acquire locks in ascending order of page index.
 793 */
 794struct page_collection *
 795page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 796{
 797    struct page_collection *set = g_malloc(sizeof(*set));
 798    tb_page_addr_t index;
 799    PageDesc *pd;
 800
 801    start >>= TARGET_PAGE_BITS;
 802    end   >>= TARGET_PAGE_BITS;
 803    g_assert(start <= end);
 804
 805    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 806                                page_entry_destroy);
 807    set->max = NULL;
 808    assert_no_pages_locked();
 809
 810 retry:
 811    g_tree_foreach(set->tree, page_entry_lock, NULL);
 812
 813    for (index = start; index <= end; index++) {
 814        TranslationBlock *tb;
 815        int n;
 816
 817        pd = page_find(index);
 818        if (pd == NULL) {
 819            continue;
 820        }
 821        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 822            g_tree_foreach(set->tree, page_entry_unlock, NULL);
 823            goto retry;
 824        }
 825        assert_page_locked(pd);
 826        PAGE_FOR_EACH_TB(pd, tb, n) {
 827            if (page_trylock_add(set, tb->page_addr[0]) ||
 828                (tb->page_addr[1] != -1 &&
 829                 page_trylock_add(set, tb->page_addr[1]))) {
 830                /* drop all locks, and reacquire in order */
 831                g_tree_foreach(set->tree, page_entry_unlock, NULL);
 832                goto retry;
 833            }
 834        }
 835    }
 836    return set;
 837}
 838
 839void page_collection_unlock(struct page_collection *set)
 840{
 841    /* entries are unlocked and freed via page_entry_destroy */
 842    g_tree_destroy(set->tree);
 843    g_free(set);
 844}
 845
 846#endif /* !CONFIG_USER_ONLY */
 847
 848static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 849                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
 850{
 851    PageDesc *p1, *p2;
 852    tb_page_addr_t page1;
 853    tb_page_addr_t page2;
 854
 855    assert_memory_lock();
 856    g_assert(phys1 != -1);
 857
 858    page1 = phys1 >> TARGET_PAGE_BITS;
 859    page2 = phys2 >> TARGET_PAGE_BITS;
 860
 861    p1 = page_find_alloc(page1, alloc);
 862    if (ret_p1) {
 863        *ret_p1 = p1;
 864    }
 865    if (likely(phys2 == -1)) {
 866        page_lock(p1);
 867        return;
 868    } else if (page1 == page2) {
 869        page_lock(p1);
 870        if (ret_p2) {
 871            *ret_p2 = p1;
 872        }
 873        return;
 874    }
 875    p2 = page_find_alloc(page2, alloc);
 876    if (ret_p2) {
 877        *ret_p2 = p2;
 878    }
 879    if (page1 < page2) {
 880        page_lock(p1);
 881        page_lock(p2);
 882    } else {
 883        page_lock(p2);
 884        page_lock(p1);
 885    }
 886}
 887
 888static bool tb_cmp(const void *ap, const void *bp)
 889{
 890    const TranslationBlock *a = ap;
 891    const TranslationBlock *b = bp;
 892
 893    return a->pc == b->pc &&
 894        a->cs_base == b->cs_base &&
 895        a->flags == b->flags &&
 896        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
 897        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
 898        a->page_addr[0] == b->page_addr[0] &&
 899        a->page_addr[1] == b->page_addr[1];
 900}
 901
 902void tb_htable_init(void)
 903{
 904    unsigned int mode = QHT_MODE_AUTO_RESIZE;
 905
 906    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
 907}
 908
 909/* call with @p->lock held */
 910static inline void invalidate_page_bitmap(PageDesc *p)
 911{
 912    assert_page_locked(p);
 913#ifdef CONFIG_SOFTMMU
 914    g_free(p->code_bitmap);
 915    p->code_bitmap = NULL;
 916    p->code_write_count = 0;
 917#endif
 918}
 919
 920/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 921static void page_flush_tb_1(int level, void **lp)
 922{
 923    int i;
 924
 925    if (*lp == NULL) {
 926        return;
 927    }
 928    if (level == 0) {
 929        PageDesc *pd = *lp;
 930
 931        for (i = 0; i < V_L2_SIZE; ++i) {
 932            page_lock(&pd[i]);
 933            pd[i].first_tb = (uintptr_t)NULL;
 934            invalidate_page_bitmap(pd + i);
 935            page_unlock(&pd[i]);
 936        }
 937    } else {
 938        void **pp = *lp;
 939
 940        for (i = 0; i < V_L2_SIZE; ++i) {
 941            page_flush_tb_1(level - 1, pp + i);
 942        }
 943    }
 944}
 945
 946static void page_flush_tb(void)
 947{
 948    int i, l1_sz = v_l1_size;
 949
 950    for (i = 0; i < l1_sz; i++) {
 951        page_flush_tb_1(v_l2_levels, l1_map + i);
 952    }
 953}
 954
 955static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
 956{
 957    const TranslationBlock *tb = value;
 958    size_t *size = data;
 959
 960    *size += tb->tc.size;
 961    return false;
 962}
 963
 964/* flush all the translation blocks */
 965static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 966{
 967    bool did_flush = false;
 968
 969    mmap_lock();
 970    /* If it is already been done on request of another CPU,
 971     * just retry.
 972     */
 973    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
 974        goto done;
 975    }
 976    did_flush = true;
 977
 978    if (DEBUG_TB_FLUSH_GATE) {
 979        size_t nb_tbs = tcg_nb_tbs();
 980        size_t host_size = 0;
 981
 982        tcg_tb_foreach(tb_host_size_iter, &host_size);
 983        printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
 984               tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
 985    }
 986
 987    CPU_FOREACH(cpu) {
 988        cpu_tb_jmp_cache_clear(cpu);
 989    }
 990
 991    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 992    page_flush_tb();
 993
 994    tcg_region_reset_all();
 995    /* XXX: flush processor icache at this point if cache flush is
 996       expensive */
 997    qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
 998
 999done:
1000    mmap_unlock();
1001    if (did_flush) {
1002        qemu_plugin_flush_cb();
1003    }
1004}
1005
1006void tb_flush(CPUState *cpu)
1007{
1008    if (tcg_enabled()) {
1009        unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1010
1011        if (cpu_in_exclusive_context(cpu)) {
1012            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1013        } else {
1014            async_safe_run_on_cpu(cpu, do_tb_flush,
1015                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
1016        }
1017    }
1018}
1019
1020/*
1021 * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1022 * so in order to prevent bit rot we compile them unconditionally in user-mode,
1023 * and let the optimizer get rid of them by wrapping their user-only callers
1024 * with if (DEBUG_TB_CHECK_GATE).
1025 */
1026#ifdef CONFIG_USER_ONLY
1027
1028static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1029{
1030    TranslationBlock *tb = p;
1031    target_ulong addr = *(target_ulong *)userp;
1032
1033    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1034        printf("ERROR invalidate: address=" TARGET_FMT_lx
1035               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1036    }
1037}
1038
1039/* verify that all the pages have correct rights for code
1040 *
1041 * Called with mmap_lock held.
1042 */
1043static void tb_invalidate_check(target_ulong address)
1044{
1045    address &= TARGET_PAGE_MASK;
1046    qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1047}
1048
1049static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1050{
1051    TranslationBlock *tb = p;
1052    int flags1, flags2;
1053
1054    flags1 = page_get_flags(tb->pc);
1055    flags2 = page_get_flags(tb->pc + tb->size - 1);
1056    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1057        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1058               (long)tb->pc, tb->size, flags1, flags2);
1059    }
1060}
1061
1062/* verify that all the pages have correct rights for code */
1063static void tb_page_check(void)
1064{
1065    qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1066}
1067
1068#endif /* CONFIG_USER_ONLY */
1069
1070/*
1071 * user-mode: call with mmap_lock held
1072 * !user-mode: call with @pd->lock held
1073 */
1074static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1075{
1076    TranslationBlock *tb1;
1077    uintptr_t *pprev;
1078    unsigned int n1;
1079
1080    assert_page_locked(pd);
1081    pprev = &pd->first_tb;
1082    PAGE_FOR_EACH_TB(pd, tb1, n1) {
1083        if (tb1 == tb) {
1084            *pprev = tb1->page_next[n1];
1085            return;
1086        }
1087        pprev = &tb1->page_next[n1];
1088    }
1089    g_assert_not_reached();
1090}
1091
1092/* remove @orig from its @n_orig-th jump list */
1093static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1094{
1095    uintptr_t ptr, ptr_locked;
1096    TranslationBlock *dest;
1097    TranslationBlock *tb;
1098    uintptr_t *pprev;
1099    int n;
1100
1101    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1102    ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1103    dest = (TranslationBlock *)(ptr & ~1);
1104    if (dest == NULL) {
1105        return;
1106    }
1107
1108    qemu_spin_lock(&dest->jmp_lock);
1109    /*
1110     * While acquiring the lock, the jump might have been removed if the
1111     * destination TB was invalidated; check again.
1112     */
1113    ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1114    if (ptr_locked != ptr) {
1115        qemu_spin_unlock(&dest->jmp_lock);
1116        /*
1117         * The only possibility is that the jump was unlinked via
1118         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1119         * because we set the LSB above.
1120         */
1121        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1122        return;
1123    }
1124    /*
1125     * We first acquired the lock, and since the destination pointer matches,
1126     * we know for sure that @orig is in the jmp list.
1127     */
1128    pprev = &dest->jmp_list_head;
1129    TB_FOR_EACH_JMP(dest, tb, n) {
1130        if (tb == orig && n == n_orig) {
1131            *pprev = tb->jmp_list_next[n];
1132            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1133            qemu_spin_unlock(&dest->jmp_lock);
1134            return;
1135        }
1136        pprev = &tb->jmp_list_next[n];
1137    }
1138    g_assert_not_reached();
1139}
1140
1141/* reset the jump entry 'n' of a TB so that it is not chained to
1142   another TB */
1143static inline void tb_reset_jump(TranslationBlock *tb, int n)
1144{
1145    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1146    tb_set_jmp_target(tb, n, addr);
1147}
1148
1149/* remove any jumps to the TB */
1150static inline void tb_jmp_unlink(TranslationBlock *dest)
1151{
1152    TranslationBlock *tb;
1153    int n;
1154
1155    qemu_spin_lock(&dest->jmp_lock);
1156
1157    TB_FOR_EACH_JMP(dest, tb, n) {
1158        tb_reset_jump(tb, n);
1159        qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1160        /* No need to clear the list entry; setting the dest ptr is enough */
1161    }
1162    dest->jmp_list_head = (uintptr_t)NULL;
1163
1164    qemu_spin_unlock(&dest->jmp_lock);
1165}
1166
1167/*
1168 * In user-mode, call with mmap_lock held.
1169 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1170 * locks held.
1171 */
1172static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1173{
1174    CPUState *cpu;
1175    PageDesc *p;
1176    uint32_t h;
1177    tb_page_addr_t phys_pc;
1178    uint32_t orig_cflags = tb_cflags(tb);
1179
1180    assert_memory_lock();
1181
1182    /* make sure no further incoming jumps will be chained to this TB */
1183    qemu_spin_lock(&tb->jmp_lock);
1184    qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1185    qemu_spin_unlock(&tb->jmp_lock);
1186
1187    /* remove the TB from the hash list */
1188    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1189    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1190                     tb->trace_vcpu_dstate);
1191    if (!qht_remove(&tb_ctx.htable, tb, h)) {
1192        return;
1193    }
1194
1195    /* remove the TB from the page list */
1196    if (rm_from_page_list) {
1197        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1198        tb_page_remove(p, tb);
1199        invalidate_page_bitmap(p);
1200        if (tb->page_addr[1] != -1) {
1201            p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1202            tb_page_remove(p, tb);
1203            invalidate_page_bitmap(p);
1204        }
1205    }
1206
1207    /* remove the TB from the hash list */
1208    h = tb_jmp_cache_hash_func(tb->pc);
1209    CPU_FOREACH(cpu) {
1210        if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1211            qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1212        }
1213    }
1214
1215    /* suppress this TB from the two jump lists */
1216    tb_remove_from_jmp_list(tb, 0);
1217    tb_remove_from_jmp_list(tb, 1);
1218
1219    /* suppress any remaining jumps to this TB */
1220    tb_jmp_unlink(tb);
1221
1222    qatomic_set(&tb_ctx.tb_phys_invalidate_count,
1223                tb_ctx.tb_phys_invalidate_count + 1);
1224}
1225
1226static void tb_phys_invalidate__locked(TranslationBlock *tb)
1227{
1228    qemu_thread_jit_write();
1229    do_tb_phys_invalidate(tb, true);
1230    qemu_thread_jit_execute();
1231}
1232
1233/* invalidate one TB
1234 *
1235 * Called with mmap_lock held in user-mode.
1236 */
1237void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1238{
1239    if (page_addr == -1 && tb->page_addr[0] != -1) {
1240        page_lock_tb(tb);
1241        do_tb_phys_invalidate(tb, true);
1242        page_unlock_tb(tb);
1243    } else {
1244        do_tb_phys_invalidate(tb, false);
1245    }
1246}
1247
1248#ifdef CONFIG_SOFTMMU
1249/* call with @p->lock held */
1250static void build_page_bitmap(PageDesc *p)
1251{
1252    int n, tb_start, tb_end;
1253    TranslationBlock *tb;
1254
1255    assert_page_locked(p);
1256    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1257
1258    PAGE_FOR_EACH_TB(p, tb, n) {
1259        /* NOTE: this is subtle as a TB may span two physical pages */
1260        if (n == 0) {
1261            /* NOTE: tb_end may be after the end of the page, but
1262               it is not a problem */
1263            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1264            tb_end = tb_start + tb->size;
1265            if (tb_end > TARGET_PAGE_SIZE) {
1266                tb_end = TARGET_PAGE_SIZE;
1267             }
1268        } else {
1269            tb_start = 0;
1270            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1271        }
1272        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1273    }
1274}
1275#endif
1276
1277/* add the tb in the target page and protect it if necessary
1278 *
1279 * Called with mmap_lock held for user-mode emulation.
1280 * Called with @p->lock held in !user-mode.
1281 */
1282static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1283                               unsigned int n, tb_page_addr_t page_addr)
1284{
1285#ifndef CONFIG_USER_ONLY
1286    bool page_already_protected;
1287#endif
1288
1289    assert_page_locked(p);
1290
1291    tb->page_addr[n] = page_addr;
1292    tb->page_next[n] = p->first_tb;
1293#ifndef CONFIG_USER_ONLY
1294    page_already_protected = p->first_tb != (uintptr_t)NULL;
1295#endif
1296    p->first_tb = (uintptr_t)tb | n;
1297    invalidate_page_bitmap(p);
1298
1299#if defined(CONFIG_USER_ONLY)
1300    if (p->flags & PAGE_WRITE) {
1301        target_ulong addr;
1302        PageDesc *p2;
1303        int prot;
1304
1305        /* force the host page as non writable (writes will have a
1306           page fault + mprotect overhead) */
1307        page_addr &= qemu_host_page_mask;
1308        prot = 0;
1309        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1310            addr += TARGET_PAGE_SIZE) {
1311
1312            p2 = page_find(addr >> TARGET_PAGE_BITS);
1313            if (!p2) {
1314                continue;
1315            }
1316            prot |= p2->flags;
1317            p2->flags &= ~PAGE_WRITE;
1318          }
1319        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1320                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1321        if (DEBUG_TB_INVALIDATE_GATE) {
1322            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1323        }
1324    }
1325#else
1326    /* if some code is already present, then the pages are already
1327       protected. So we handle the case where only the first TB is
1328       allocated in a physical page */
1329    if (!page_already_protected) {
1330        tlb_protect_code(page_addr);
1331    }
1332#endif
1333}
1334
1335/*
1336 * Add a new TB and link it to the physical page tables. phys_page2 is
1337 * (-1) to indicate that only one page contains the TB.
1338 *
1339 * Called with mmap_lock held for user-mode emulation.
1340 *
1341 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1342 * Note that in !user-mode, another thread might have already added a TB
1343 * for the same block of guest code that @tb corresponds to. In that case,
1344 * the caller should discard the original @tb, and use instead the returned TB.
1345 */
1346static TranslationBlock *
1347tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1348             tb_page_addr_t phys_page2)
1349{
1350    PageDesc *p;
1351    PageDesc *p2 = NULL;
1352    void *existing_tb = NULL;
1353    uint32_t h;
1354
1355    assert_memory_lock();
1356    tcg_debug_assert(!(tb->cflags & CF_INVALID));
1357
1358    /*
1359     * Add the TB to the page list, acquiring first the pages's locks.
1360     * We keep the locks held until after inserting the TB in the hash table,
1361     * so that if the insertion fails we know for sure that the TBs are still
1362     * in the page descriptors.
1363     * Note that inserting into the hash table first isn't an option, since
1364     * we can only insert TBs that are fully initialized.
1365     */
1366    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1367    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1368    if (p2) {
1369        tb_page_add(p2, tb, 1, phys_page2);
1370    } else {
1371        tb->page_addr[1] = -1;
1372    }
1373
1374    /* add in the hash table */
1375    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1376                     tb->trace_vcpu_dstate);
1377    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1378
1379    /* remove TB from the page(s) if we couldn't insert it */
1380    if (unlikely(existing_tb)) {
1381        tb_page_remove(p, tb);
1382        invalidate_page_bitmap(p);
1383        if (p2) {
1384            tb_page_remove(p2, tb);
1385            invalidate_page_bitmap(p2);
1386        }
1387        tb = existing_tb;
1388    }
1389
1390    if (p2 && p2 != p) {
1391        page_unlock(p2);
1392    }
1393    page_unlock(p);
1394
1395#ifdef CONFIG_USER_ONLY
1396    if (DEBUG_TB_CHECK_GATE) {
1397        tb_page_check();
1398    }
1399#endif
1400    return tb;
1401}
1402
1403/* Called with mmap_lock held for user mode emulation.  */
1404TranslationBlock *tb_gen_code(CPUState *cpu,
1405                              target_ulong pc, target_ulong cs_base,
1406                              uint32_t flags, int cflags)
1407{
1408    CPUArchState *env = cpu->env_ptr;
1409    TranslationBlock *tb, *existing_tb;
1410    tb_page_addr_t phys_pc, phys_page2;
1411    target_ulong virt_page2;
1412    tcg_insn_unit *gen_code_buf;
1413    int gen_code_size, search_size, max_insns;
1414#ifdef CONFIG_PROFILER
1415    TCGProfile *prof = &tcg_ctx->prof;
1416    int64_t ti;
1417#endif
1418
1419    assert_memory_lock();
1420    qemu_thread_jit_write();
1421
1422    phys_pc = get_page_addr_code(env, pc);
1423
1424    if (phys_pc == -1) {
1425        /* Generate a one-shot TB with 1 insn in it */
1426        cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1427    }
1428
1429    max_insns = cflags & CF_COUNT_MASK;
1430    if (max_insns == 0) {
1431        max_insns = TCG_MAX_INSNS;
1432    }
1433    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
1434
1435 buffer_overflow:
1436    tb = tcg_tb_alloc(tcg_ctx);
1437    if (unlikely(!tb)) {
1438        /* flush must be done */
1439        tb_flush(cpu);
1440        mmap_unlock();
1441        /* Make the execution loop process the flush as soon as possible.  */
1442        cpu->exception_index = EXCP_INTERRUPT;
1443        cpu_loop_exit(cpu);
1444    }
1445
1446    gen_code_buf = tcg_ctx->code_gen_ptr;
1447    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1448    tb->pc = pc;
1449    tb->cs_base = cs_base;
1450    tb->flags = flags;
1451    tb->cflags = cflags;
1452    tb->trace_vcpu_dstate = *cpu->trace_dstate;
1453    tcg_ctx->tb_cflags = cflags;
1454 tb_overflow:
1455
1456#ifdef CONFIG_PROFILER
1457    /* includes aborted translations because of exceptions */
1458    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1459    ti = profile_getclock();
1460#endif
1461
1462    gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1463    if (unlikely(gen_code_size != 0)) {
1464        goto error_return;
1465    }
1466
1467    tcg_func_start(tcg_ctx);
1468
1469    tcg_ctx->cpu = env_cpu(env);
1470    gen_intermediate_code(cpu, tb, max_insns);
1471    assert(tb->size != 0);
1472    tcg_ctx->cpu = NULL;
1473    max_insns = tb->icount;
1474
1475    trace_translate_block(tb, tb->pc, tb->tc.ptr);
1476
1477    /* generate machine code */
1478    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1479    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1480    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1481    if (TCG_TARGET_HAS_direct_jump) {
1482        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1483        tcg_ctx->tb_jmp_target_addr = NULL;
1484    } else {
1485        tcg_ctx->tb_jmp_insn_offset = NULL;
1486        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1487    }
1488
1489#ifdef CONFIG_PROFILER
1490    qatomic_set(&prof->tb_count, prof->tb_count + 1);
1491    qatomic_set(&prof->interm_time,
1492                prof->interm_time + profile_getclock() - ti);
1493    ti = profile_getclock();
1494#endif
1495
1496    gen_code_size = tcg_gen_code(tcg_ctx, tb);
1497    if (unlikely(gen_code_size < 0)) {
1498 error_return:
1499        switch (gen_code_size) {
1500        case -1:
1501            /*
1502             * Overflow of code_gen_buffer, or the current slice of it.
1503             *
1504             * TODO: We don't need to re-do gen_intermediate_code, nor
1505             * should we re-do the tcg optimization currently hidden
1506             * inside tcg_gen_code.  All that should be required is to
1507             * flush the TBs, allocate a new TB, re-initialize it per
1508             * above, and re-do the actual code generation.
1509             */
1510            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1511                          "Restarting code generation for "
1512                          "code_gen_buffer overflow\n");
1513            goto buffer_overflow;
1514
1515        case -2:
1516            /*
1517             * The code generated for the TranslationBlock is too large.
1518             * The maximum size allowed by the unwind info is 64k.
1519             * There may be stricter constraints from relocations
1520             * in the tcg backend.
1521             *
1522             * Try again with half as many insns as we attempted this time.
1523             * If a single insn overflows, there's a bug somewhere...
1524             */
1525            assert(max_insns > 1);
1526            max_insns /= 2;
1527            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1528                          "Restarting code generation with "
1529                          "smaller translation block (max %d insns)\n",
1530                          max_insns);
1531            goto tb_overflow;
1532
1533        default:
1534            g_assert_not_reached();
1535        }
1536    }
1537    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1538    if (unlikely(search_size < 0)) {
1539        goto buffer_overflow;
1540    }
1541    tb->tc.size = gen_code_size;
1542
1543#ifdef CONFIG_PROFILER
1544    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1545    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1546    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1547    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1548#endif
1549
1550#ifdef DEBUG_DISAS
1551    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1552        qemu_log_in_addr_range(tb->pc)) {
1553        FILE *logfile = qemu_log_lock();
1554        int code_size, data_size;
1555        const tcg_target_ulong *rx_data_gen_ptr;
1556        size_t chunk_start;
1557        int insn = 0;
1558
1559        if (tcg_ctx->data_gen_ptr) {
1560            rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1561            code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1562            data_size = gen_code_size - code_size;
1563        } else {
1564            rx_data_gen_ptr = 0;
1565            code_size = gen_code_size;
1566            data_size = 0;
1567        }
1568
1569        /* Dump header and the first instruction */
1570        qemu_log("OUT: [size=%d]\n", gen_code_size);
1571        qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1572                 tcg_ctx->gen_insn_data[insn][0]);
1573        chunk_start = tcg_ctx->gen_insn_end_off[insn];
1574        log_disas(tb->tc.ptr, chunk_start);
1575
1576        /*
1577         * Dump each instruction chunk, wrapping up empty chunks into
1578         * the next instruction. The whole array is offset so the
1579         * first entry is the beginning of the 2nd instruction.
1580         */
1581        while (insn < tb->icount) {
1582            size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1583            if (chunk_end > chunk_start) {
1584                qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
1585                         tcg_ctx->gen_insn_data[insn][0]);
1586                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
1587                chunk_start = chunk_end;
1588            }
1589            insn++;
1590        }
1591
1592        if (chunk_start < code_size) {
1593            qemu_log("  -- tb slow paths + alignment\n");
1594            log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
1595        }
1596
1597        /* Finally dump any data we may have after the block */
1598        if (data_size) {
1599            int i;
1600            qemu_log("  data: [size=%d]\n", data_size);
1601            for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1602                if (sizeof(tcg_target_ulong) == 8) {
1603                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1604                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1605                } else if (sizeof(tcg_target_ulong) == 4) {
1606                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1607                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1608                } else {
1609                    qemu_build_not_reached();
1610                }
1611            }
1612        }
1613        qemu_log("\n");
1614        qemu_log_flush();
1615        qemu_log_unlock(logfile);
1616    }
1617#endif
1618
1619    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1620        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1621                 CODE_GEN_ALIGN));
1622
1623    /* init jump list */
1624    qemu_spin_init(&tb->jmp_lock);
1625    tb->jmp_list_head = (uintptr_t)NULL;
1626    tb->jmp_list_next[0] = (uintptr_t)NULL;
1627    tb->jmp_list_next[1] = (uintptr_t)NULL;
1628    tb->jmp_dest[0] = (uintptr_t)NULL;
1629    tb->jmp_dest[1] = (uintptr_t)NULL;
1630
1631    /* init original jump addresses which have been set during tcg_gen_code() */
1632    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1633        tb_reset_jump(tb, 0);
1634    }
1635    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1636        tb_reset_jump(tb, 1);
1637    }
1638
1639    /*
1640     * If the TB is not associated with a physical RAM page then
1641     * it must be a temporary one-insn TB, and we have nothing to do
1642     * except fill in the page_addr[] fields. Return early before
1643     * attempting to link to other TBs or add to the lookup table.
1644     */
1645    if (phys_pc == -1) {
1646        tb->page_addr[0] = tb->page_addr[1] = -1;
1647        return tb;
1648    }
1649
1650    /*
1651     * Insert TB into the corresponding region tree before publishing it
1652     * through QHT. Otherwise rewinding happened in the TB might fail to
1653     * lookup itself using host PC.
1654     */
1655    tcg_tb_insert(tb);
1656
1657    /* check next page if needed */
1658    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1659    phys_page2 = -1;
1660    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1661        phys_page2 = get_page_addr_code(env, virt_page2);
1662    }
1663    /*
1664     * No explicit memory barrier is required -- tb_link_page() makes the
1665     * TB visible in a consistent state.
1666     */
1667    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1668    /* if the TB already exists, discard what we just translated */
1669    if (unlikely(existing_tb != tb)) {
1670        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1671
1672        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1673        qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1674        tcg_tb_remove(tb);
1675        return existing_tb;
1676    }
1677    return tb;
1678}
1679
1680/*
1681 * @p must be non-NULL.
1682 * user-mode: call with mmap_lock held.
1683 * !user-mode: call with all @pages locked.
1684 */
1685static void
1686tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1687                                      PageDesc *p, tb_page_addr_t start,
1688                                      tb_page_addr_t end,
1689                                      uintptr_t retaddr)
1690{
1691    TranslationBlock *tb;
1692    tb_page_addr_t tb_start, tb_end;
1693    int n;
1694#ifdef TARGET_HAS_PRECISE_SMC
1695    CPUState *cpu = current_cpu;
1696    CPUArchState *env = NULL;
1697    bool current_tb_not_found = retaddr != 0;
1698    bool current_tb_modified = false;
1699    TranslationBlock *current_tb = NULL;
1700    target_ulong current_pc = 0;
1701    target_ulong current_cs_base = 0;
1702    uint32_t current_flags = 0;
1703#endif /* TARGET_HAS_PRECISE_SMC */
1704
1705    assert_page_locked(p);
1706
1707#if defined(TARGET_HAS_PRECISE_SMC)
1708    if (cpu != NULL) {
1709        env = cpu->env_ptr;
1710    }
1711#endif
1712
1713    /* we remove all the TBs in the range [start, end[ */
1714    /* XXX: see if in some cases it could be faster to invalidate all
1715       the code */
1716    PAGE_FOR_EACH_TB(p, tb, n) {
1717        assert_page_locked(p);
1718        /* NOTE: this is subtle as a TB may span two physical pages */
1719        if (n == 0) {
1720            /* NOTE: tb_end may be after the end of the page, but
1721               it is not a problem */
1722            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1723            tb_end = tb_start + tb->size;
1724        } else {
1725            tb_start = tb->page_addr[1];
1726            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1727        }
1728        if (!(tb_end <= start || tb_start >= end)) {
1729#ifdef TARGET_HAS_PRECISE_SMC
1730            if (current_tb_not_found) {
1731                current_tb_not_found = false;
1732                /* now we have a real cpu fault */
1733                current_tb = tcg_tb_lookup(retaddr);
1734            }
1735            if (current_tb == tb &&
1736                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1737                /*
1738                 * If we are modifying the current TB, we must stop
1739                 * its execution. We could be more precise by checking
1740                 * that the modification is after the current PC, but it
1741                 * would require a specialized function to partially
1742                 * restore the CPU state.
1743                 */
1744                current_tb_modified = true;
1745                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1746                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1747                                     &current_flags);
1748            }
1749#endif /* TARGET_HAS_PRECISE_SMC */
1750            tb_phys_invalidate__locked(tb);
1751        }
1752    }
1753#if !defined(CONFIG_USER_ONLY)
1754    /* if no code remaining, no need to continue to use slow writes */
1755    if (!p->first_tb) {
1756        invalidate_page_bitmap(p);
1757        tlb_unprotect_code(start);
1758    }
1759#endif
1760#ifdef TARGET_HAS_PRECISE_SMC
1761    if (current_tb_modified) {
1762        page_collection_unlock(pages);
1763        /* Force execution of one insn next time.  */
1764        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
1765        mmap_unlock();
1766        cpu_loop_exit_noexc(cpu);
1767    }
1768#endif
1769}
1770
1771/*
1772 * Invalidate all TBs which intersect with the target physical address range
1773 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1774 * 'is_cpu_write_access' should be true if called from a real cpu write
1775 * access: the virtual CPU will exit the current TB if code is modified inside
1776 * this TB.
1777 *
1778 * Called with mmap_lock held for user-mode emulation
1779 */
1780void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1781{
1782    struct page_collection *pages;
1783    PageDesc *p;
1784
1785    assert_memory_lock();
1786
1787    p = page_find(start >> TARGET_PAGE_BITS);
1788    if (p == NULL) {
1789        return;
1790    }
1791    pages = page_collection_lock(start, end);
1792    tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1793    page_collection_unlock(pages);
1794}
1795
1796/*
1797 * Invalidate all TBs which intersect with the target physical address range
1798 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1799 * 'is_cpu_write_access' should be true if called from a real cpu write
1800 * access: the virtual CPU will exit the current TB if code is modified inside
1801 * this TB.
1802 *
1803 * Called with mmap_lock held for user-mode emulation.
1804 */
1805#ifdef CONFIG_SOFTMMU
1806void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1807#else
1808void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1809#endif
1810{
1811    struct page_collection *pages;
1812    tb_page_addr_t next;
1813
1814    assert_memory_lock();
1815
1816    pages = page_collection_lock(start, end);
1817    for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1818         start < end;
1819         start = next, next += TARGET_PAGE_SIZE) {
1820        PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1821        tb_page_addr_t bound = MIN(next, end);
1822
1823        if (pd == NULL) {
1824            continue;
1825        }
1826        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1827    }
1828    page_collection_unlock(pages);
1829}
1830
1831#ifdef CONFIG_SOFTMMU
1832/* len must be <= 8 and start must be a multiple of len.
1833 * Called via softmmu_template.h when code areas are written to with
1834 * iothread mutex not held.
1835 *
1836 * Call with all @pages in the range [@start, @start + len[ locked.
1837 */
1838void tb_invalidate_phys_page_fast(struct page_collection *pages,
1839                                  tb_page_addr_t start, int len,
1840                                  uintptr_t retaddr)
1841{
1842    PageDesc *p;
1843
1844    assert_memory_lock();
1845
1846    p = page_find(start >> TARGET_PAGE_BITS);
1847    if (!p) {
1848        return;
1849    }
1850
1851    assert_page_locked(p);
1852    if (!p->code_bitmap &&
1853        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1854        build_page_bitmap(p);
1855    }
1856    if (p->code_bitmap) {
1857        unsigned int nr;
1858        unsigned long b;
1859
1860        nr = start & ~TARGET_PAGE_MASK;
1861        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1862        if (b & ((1 << len) - 1)) {
1863            goto do_invalidate;
1864        }
1865    } else {
1866    do_invalidate:
1867        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1868                                              retaddr);
1869    }
1870}
1871#else
1872/* Called with mmap_lock held. If pc is not 0 then it indicates the
1873 * host PC of the faulting store instruction that caused this invalidate.
1874 * Returns true if the caller needs to abort execution of the current
1875 * TB (because it was modified by this store and the guest CPU has
1876 * precise-SMC semantics).
1877 */
1878static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1879{
1880    TranslationBlock *tb;
1881    PageDesc *p;
1882    int n;
1883#ifdef TARGET_HAS_PRECISE_SMC
1884    TranslationBlock *current_tb = NULL;
1885    CPUState *cpu = current_cpu;
1886    CPUArchState *env = NULL;
1887    int current_tb_modified = 0;
1888    target_ulong current_pc = 0;
1889    target_ulong current_cs_base = 0;
1890    uint32_t current_flags = 0;
1891#endif
1892
1893    assert_memory_lock();
1894
1895    addr &= TARGET_PAGE_MASK;
1896    p = page_find(addr >> TARGET_PAGE_BITS);
1897    if (!p) {
1898        return false;
1899    }
1900
1901#ifdef TARGET_HAS_PRECISE_SMC
1902    if (p->first_tb && pc != 0) {
1903        current_tb = tcg_tb_lookup(pc);
1904    }
1905    if (cpu != NULL) {
1906        env = cpu->env_ptr;
1907    }
1908#endif
1909    assert_page_locked(p);
1910    PAGE_FOR_EACH_TB(p, tb, n) {
1911#ifdef TARGET_HAS_PRECISE_SMC
1912        if (current_tb == tb &&
1913            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1914                /* If we are modifying the current TB, we must stop
1915                   its execution. We could be more precise by checking
1916                   that the modification is after the current PC, but it
1917                   would require a specialized function to partially
1918                   restore the CPU state */
1919
1920            current_tb_modified = 1;
1921            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1922            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1923                                 &current_flags);
1924        }
1925#endif /* TARGET_HAS_PRECISE_SMC */
1926        tb_phys_invalidate(tb, addr);
1927    }
1928    p->first_tb = (uintptr_t)NULL;
1929#ifdef TARGET_HAS_PRECISE_SMC
1930    if (current_tb_modified) {
1931        /* Force execution of one insn next time.  */
1932        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
1933        return true;
1934    }
1935#endif
1936
1937    return false;
1938}
1939#endif
1940
1941/* user-mode: call with mmap_lock held */
1942void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1943{
1944    TranslationBlock *tb;
1945
1946    assert_memory_lock();
1947
1948    tb = tcg_tb_lookup(retaddr);
1949    if (tb) {
1950        /* We can use retranslation to find the PC.  */
1951        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1952        tb_phys_invalidate(tb, -1);
1953    } else {
1954        /* The exception probably happened in a helper.  The CPU state should
1955           have been saved before calling it. Fetch the PC from there.  */
1956        CPUArchState *env = cpu->env_ptr;
1957        target_ulong pc, cs_base;
1958        tb_page_addr_t addr;
1959        uint32_t flags;
1960
1961        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1962        addr = get_page_addr_code(env, pc);
1963        if (addr != -1) {
1964            tb_invalidate_phys_range(addr, addr + 1);
1965        }
1966    }
1967}
1968
1969#ifndef CONFIG_USER_ONLY
1970/*
1971 * In deterministic execution mode, instructions doing device I/Os
1972 * must be at the end of the TB.
1973 *
1974 * Called by softmmu_template.h, with iothread mutex not held.
1975 */
1976void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1977{
1978    TranslationBlock *tb;
1979    CPUClass *cc;
1980    uint32_t n;
1981
1982    tb = tcg_tb_lookup(retaddr);
1983    if (!tb) {
1984        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1985                  (void *)retaddr);
1986    }
1987    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1988
1989    /*
1990     * Some guests must re-execute the branch when re-executing a delay
1991     * slot instruction.  When this is the case, adjust icount and N
1992     * to account for the re-execution of the branch.
1993     */
1994    n = 1;
1995    cc = CPU_GET_CLASS(cpu);
1996    if (cc->tcg_ops->io_recompile_replay_branch &&
1997        cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1998        cpu_neg(cpu)->icount_decr.u16.low++;
1999        n = 2;
2000    }
2001
2002    /*
2003     * Exit the loop and potentially generate a new TB executing the
2004     * just the I/O insns. We also limit instrumentation to memory
2005     * operations only (which execute after completion) so we don't
2006     * double instrument the instruction.
2007     */
2008    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
2009
2010    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
2011                           "cpu_io_recompile: rewound execution of TB to "
2012                           TARGET_FMT_lx "\n", tb->pc);
2013
2014    cpu_loop_exit_noexc(cpu);
2015}
2016
2017static void print_qht_statistics(struct qht_stats hst)
2018{
2019    uint32_t hgram_opts;
2020    size_t hgram_bins;
2021    char *hgram;
2022
2023    if (!hst.head_buckets) {
2024        return;
2025    }
2026    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2027                hst.used_head_buckets, hst.head_buckets,
2028                (double)hst.used_head_buckets / hst.head_buckets * 100);
2029
2030    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2031    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2032    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2033        hgram_opts |= QDIST_PR_NODECIMAL;
2034    }
2035    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2036    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2037                qdist_avg(&hst.occupancy) * 100, hgram);
2038    g_free(hgram);
2039
2040    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2041    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2042    if (hgram_bins > 10) {
2043        hgram_bins = 10;
2044    } else {
2045        hgram_bins = 0;
2046        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2047    }
2048    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2049    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2050                qdist_avg(&hst.chain), hgram);
2051    g_free(hgram);
2052}
2053
2054struct tb_tree_stats {
2055    size_t nb_tbs;
2056    size_t host_size;
2057    size_t target_size;
2058    size_t max_target_size;
2059    size_t direct_jmp_count;
2060    size_t direct_jmp2_count;
2061    size_t cross_page;
2062};
2063
2064static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2065{
2066    const TranslationBlock *tb = value;
2067    struct tb_tree_stats *tst = data;
2068
2069    tst->nb_tbs++;
2070    tst->host_size += tb->tc.size;
2071    tst->target_size += tb->size;
2072    if (tb->size > tst->max_target_size) {
2073        tst->max_target_size = tb->size;
2074    }
2075    if (tb->page_addr[1] != -1) {
2076        tst->cross_page++;
2077    }
2078    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2079        tst->direct_jmp_count++;
2080        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2081            tst->direct_jmp2_count++;
2082        }
2083    }
2084    return false;
2085}
2086
2087void dump_exec_info(void)
2088{
2089    struct tb_tree_stats tst = {};
2090    struct qht_stats hst;
2091    size_t nb_tbs, flush_full, flush_part, flush_elide;
2092
2093    tcg_tb_foreach(tb_tree_stats_iter, &tst);
2094    nb_tbs = tst.nb_tbs;
2095    /* XXX: avoid using doubles ? */
2096    qemu_printf("Translation buffer state:\n");
2097    /*
2098     * Report total code size including the padding and TB structs;
2099     * otherwise users might think "-accel tcg,tb-size" is not honoured.
2100     * For avg host size we use the precise numbers from tb_tree_stats though.
2101     */
2102    qemu_printf("gen code size       %zu/%zu\n",
2103                tcg_code_size(), tcg_code_capacity());
2104    qemu_printf("TB count            %zu\n", nb_tbs);
2105    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2106                nb_tbs ? tst.target_size / nb_tbs : 0,
2107                tst.max_target_size);
2108    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2109                nb_tbs ? tst.host_size / nb_tbs : 0,
2110                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2111    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2112                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2113    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2114                tst.direct_jmp_count,
2115                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2116                tst.direct_jmp2_count,
2117                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2118
2119    qht_statistics_init(&tb_ctx.htable, &hst);
2120    print_qht_statistics(hst);
2121    qht_statistics_destroy(&hst);
2122
2123    qemu_printf("\nStatistics:\n");
2124    qemu_printf("TB flush count      %u\n",
2125                qatomic_read(&tb_ctx.tb_flush_count));
2126    qemu_printf("TB invalidate count %u\n",
2127                qatomic_read(&tb_ctx.tb_phys_invalidate_count));
2128
2129    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2130    qemu_printf("TLB full flushes    %zu\n", flush_full);
2131    qemu_printf("TLB partial flushes %zu\n", flush_part);
2132    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2133    tcg_dump_info();
2134}
2135
2136void dump_opcount_info(void)
2137{
2138    tcg_dump_op_count();
2139}
2140
2141#else /* CONFIG_USER_ONLY */
2142
2143void cpu_interrupt(CPUState *cpu, int mask)
2144{
2145    g_assert(qemu_mutex_iothread_locked());
2146    cpu->interrupt_request |= mask;
2147    qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2148}
2149
2150/*
2151 * Walks guest process memory "regions" one by one
2152 * and calls callback function 'fn' for each region.
2153 */
2154struct walk_memory_regions_data {
2155    walk_memory_regions_fn fn;
2156    void *priv;
2157    target_ulong start;
2158    int prot;
2159};
2160
2161static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2162                                   target_ulong end, int new_prot)
2163{
2164    if (data->start != -1u) {
2165        int rc = data->fn(data->priv, data->start, end, data->prot);
2166        if (rc != 0) {
2167            return rc;
2168        }
2169    }
2170
2171    data->start = (new_prot ? end : -1u);
2172    data->prot = new_prot;
2173
2174    return 0;
2175}
2176
2177static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2178                                 target_ulong base, int level, void **lp)
2179{
2180    target_ulong pa;
2181    int i, rc;
2182
2183    if (*lp == NULL) {
2184        return walk_memory_regions_end(data, base, 0);
2185    }
2186
2187    if (level == 0) {
2188        PageDesc *pd = *lp;
2189
2190        for (i = 0; i < V_L2_SIZE; ++i) {
2191            int prot = pd[i].flags;
2192
2193            pa = base | (i << TARGET_PAGE_BITS);
2194            if (prot != data->prot) {
2195                rc = walk_memory_regions_end(data, pa, prot);
2196                if (rc != 0) {
2197                    return rc;
2198                }
2199            }
2200        }
2201    } else {
2202        void **pp = *lp;
2203
2204        for (i = 0; i < V_L2_SIZE; ++i) {
2205            pa = base | ((target_ulong)i <<
2206                (TARGET_PAGE_BITS + V_L2_BITS * level));
2207            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2208            if (rc != 0) {
2209                return rc;
2210            }
2211        }
2212    }
2213
2214    return 0;
2215}
2216
2217int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2218{
2219    struct walk_memory_regions_data data;
2220    uintptr_t i, l1_sz = v_l1_size;
2221
2222    data.fn = fn;
2223    data.priv = priv;
2224    data.start = -1u;
2225    data.prot = 0;
2226
2227    for (i = 0; i < l1_sz; i++) {
2228        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2229        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2230        if (rc != 0) {
2231            return rc;
2232        }
2233    }
2234
2235    return walk_memory_regions_end(&data, 0, 0);
2236}
2237
2238static int dump_region(void *priv, target_ulong start,
2239    target_ulong end, unsigned long prot)
2240{
2241    FILE *f = (FILE *)priv;
2242
2243    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2244        " "TARGET_FMT_lx" %c%c%c\n",
2245        start, end, end - start,
2246        ((prot & PAGE_READ) ? 'r' : '-'),
2247        ((prot & PAGE_WRITE) ? 'w' : '-'),
2248        ((prot & PAGE_EXEC) ? 'x' : '-'));
2249
2250    return 0;
2251}
2252
2253/* dump memory mappings */
2254void page_dump(FILE *f)
2255{
2256    const int length = sizeof(target_ulong) * 2;
2257    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2258            length, "start", length, "end", length, "size", "prot");
2259    walk_memory_regions(f, dump_region);
2260}
2261
2262int page_get_flags(target_ulong address)
2263{
2264    PageDesc *p;
2265
2266    p = page_find(address >> TARGET_PAGE_BITS);
2267    if (!p) {
2268        return 0;
2269    }
2270    return p->flags;
2271}
2272
2273/* Modify the flags of a page and invalidate the code if necessary.
2274   The flag PAGE_WRITE_ORG is positioned automatically depending
2275   on PAGE_WRITE.  The mmap_lock should already be held.  */
2276void page_set_flags(target_ulong start, target_ulong end, int flags)
2277{
2278    target_ulong addr, len;
2279    bool reset_target_data;
2280
2281    /* This function should never be called with addresses outside the
2282       guest address space.  If this assert fires, it probably indicates
2283       a missing call to h2g_valid.  */
2284    assert(end - 1 <= GUEST_ADDR_MAX);
2285    assert(start < end);
2286    /* Only set PAGE_ANON with new mappings. */
2287    assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2288    assert_memory_lock();
2289
2290    start = start & TARGET_PAGE_MASK;
2291    end = TARGET_PAGE_ALIGN(end);
2292
2293    if (flags & PAGE_WRITE) {
2294        flags |= PAGE_WRITE_ORG;
2295    }
2296    reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2297    flags &= ~PAGE_RESET;
2298
2299    for (addr = start, len = end - start;
2300         len != 0;
2301         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2302        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2303
2304        /* If the write protection bit is set, then we invalidate
2305           the code inside.  */
2306        if (!(p->flags & PAGE_WRITE) &&
2307            (flags & PAGE_WRITE) &&
2308            p->first_tb) {
2309            tb_invalidate_phys_page(addr, 0);
2310        }
2311        if (reset_target_data) {
2312            g_free(p->target_data);
2313            p->target_data = NULL;
2314            p->flags = flags;
2315        } else {
2316            /* Using mprotect on a page does not change MAP_ANON. */
2317            p->flags = (p->flags & PAGE_ANON) | flags;
2318        }
2319    }
2320}
2321
2322void *page_get_target_data(target_ulong address)
2323{
2324    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2325    return p ? p->target_data : NULL;
2326}
2327
2328void *page_alloc_target_data(target_ulong address, size_t size)
2329{
2330    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2331    void *ret = NULL;
2332
2333    if (p->flags & PAGE_VALID) {
2334        ret = p->target_data;
2335        if (!ret) {
2336            p->target_data = ret = g_malloc0(size);
2337        }
2338    }
2339    return ret;
2340}
2341
2342int page_check_range(target_ulong start, target_ulong len, int flags)
2343{
2344    PageDesc *p;
2345    target_ulong end;
2346    target_ulong addr;
2347
2348    /* This function should never be called with addresses outside the
2349       guest address space.  If this assert fires, it probably indicates
2350       a missing call to h2g_valid.  */
2351    if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2352        assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2353    }
2354
2355    if (len == 0) {
2356        return 0;
2357    }
2358    if (start + len - 1 < start) {
2359        /* We've wrapped around.  */
2360        return -1;
2361    }
2362
2363    /* must do before we loose bits in the next step */
2364    end = TARGET_PAGE_ALIGN(start + len);
2365    start = start & TARGET_PAGE_MASK;
2366
2367    for (addr = start, len = end - start;
2368         len != 0;
2369         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2370        p = page_find(addr >> TARGET_PAGE_BITS);
2371        if (!p) {
2372            return -1;
2373        }
2374        if (!(p->flags & PAGE_VALID)) {
2375            return -1;
2376        }
2377
2378        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2379            return -1;
2380        }
2381        if (flags & PAGE_WRITE) {
2382            if (!(p->flags & PAGE_WRITE_ORG)) {
2383                return -1;
2384            }
2385            /* unprotect the page if it was put read-only because it
2386               contains translated code */
2387            if (!(p->flags & PAGE_WRITE)) {
2388                if (!page_unprotect(addr, 0)) {
2389                    return -1;
2390                }
2391            }
2392        }
2393    }
2394    return 0;
2395}
2396
2397/* called from signal handler: invalidate the code and unprotect the
2398 * page. Return 0 if the fault was not handled, 1 if it was handled,
2399 * and 2 if it was handled but the caller must cause the TB to be
2400 * immediately exited. (We can only return 2 if the 'pc' argument is
2401 * non-zero.)
2402 */
2403int page_unprotect(target_ulong address, uintptr_t pc)
2404{
2405    unsigned int prot;
2406    bool current_tb_invalidated;
2407    PageDesc *p;
2408    target_ulong host_start, host_end, addr;
2409
2410    /* Technically this isn't safe inside a signal handler.  However we
2411       know this only ever happens in a synchronous SEGV handler, so in
2412       practice it seems to be ok.  */
2413    mmap_lock();
2414
2415    p = page_find(address >> TARGET_PAGE_BITS);
2416    if (!p) {
2417        mmap_unlock();
2418        return 0;
2419    }
2420
2421    /* if the page was really writable, then we change its
2422       protection back to writable */
2423    if (p->flags & PAGE_WRITE_ORG) {
2424        current_tb_invalidated = false;
2425        if (p->flags & PAGE_WRITE) {
2426            /* If the page is actually marked WRITE then assume this is because
2427             * this thread raced with another one which got here first and
2428             * set the page to PAGE_WRITE and did the TB invalidate for us.
2429             */
2430#ifdef TARGET_HAS_PRECISE_SMC
2431            TranslationBlock *current_tb = tcg_tb_lookup(pc);
2432            if (current_tb) {
2433                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2434            }
2435#endif
2436        } else {
2437            host_start = address & qemu_host_page_mask;
2438            host_end = host_start + qemu_host_page_size;
2439
2440            prot = 0;
2441            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2442                p = page_find(addr >> TARGET_PAGE_BITS);
2443                p->flags |= PAGE_WRITE;
2444                prot |= p->flags;
2445
2446                /* and since the content will be modified, we must invalidate
2447                   the corresponding translated code. */
2448                current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2449#ifdef CONFIG_USER_ONLY
2450                if (DEBUG_TB_CHECK_GATE) {
2451                    tb_invalidate_check(addr);
2452                }
2453#endif
2454            }
2455            mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2456                     prot & PAGE_BITS);
2457        }
2458        mmap_unlock();
2459        /* If current TB was invalidated return to main loop */
2460        return current_tb_invalidated ? 2 : 1;
2461    }
2462    mmap_unlock();
2463    return 0;
2464}
2465#endif /* CONFIG_USER_ONLY */
2466
2467/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2468void tcg_flush_softmmu_tlb(CPUState *cs)
2469{
2470#ifdef CONFIG_SOFTMMU
2471    tlb_flush(cs);
2472#endif
2473}
2474