qemu/accel/tcg/translate-all.c
<<
>>
Prefs
   1/*
   2 *  Host code generation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22
  23#define NO_CPU_IO_DEFS
  24#include "trace.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg.h"
  28#if defined(CONFIG_USER_ONLY)
  29#include "qemu.h"
  30#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  31#include <sys/param.h>
  32#if __FreeBSD_version >= 700104
  33#define HAVE_KINFO_GETVMMAP
  34#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  35#include <sys/proc.h>
  36#include <machine/profile.h>
  37#define _KERNEL
  38#include <sys/user.h>
  39#undef _KERNEL
  40#undef sigqueue
  41#include <libutil.h>
  42#endif
  43#endif
  44#else
  45#include "exec/ram_addr.h"
  46#endif
  47
  48#include "exec/cputlb.h"
  49#include "exec/translate-all.h"
  50#include "qemu/bitmap.h"
  51#include "qemu/qemu-print.h"
  52#include "qemu/timer.h"
  53#include "qemu/main-loop.h"
  54#include "qemu/cacheinfo.h"
  55#include "exec/log.h"
  56#include "sysemu/cpus.h"
  57#include "sysemu/cpu-timers.h"
  58#include "sysemu/tcg.h"
  59#include "qapi/error.h"
  60#include "hw/core/tcg-cpu-ops.h"
  61#include "tb-hash.h"
  62#include "tb-context.h"
  63#include "internal.h"
  64
  65/* #define DEBUG_TB_INVALIDATE */
  66/* #define DEBUG_TB_FLUSH */
  67/* make various TB consistency checks */
  68/* #define DEBUG_TB_CHECK */
  69
  70#ifdef DEBUG_TB_INVALIDATE
  71#define DEBUG_TB_INVALIDATE_GATE 1
  72#else
  73#define DEBUG_TB_INVALIDATE_GATE 0
  74#endif
  75
  76#ifdef DEBUG_TB_FLUSH
  77#define DEBUG_TB_FLUSH_GATE 1
  78#else
  79#define DEBUG_TB_FLUSH_GATE 0
  80#endif
  81
  82#if !defined(CONFIG_USER_ONLY)
  83/* TB consistency checks only implemented for usermode emulation.  */
  84#undef DEBUG_TB_CHECK
  85#endif
  86
  87#ifdef DEBUG_TB_CHECK
  88#define DEBUG_TB_CHECK_GATE 1
  89#else
  90#define DEBUG_TB_CHECK_GATE 0
  91#endif
  92
  93/* Access to the various translations structures need to be serialised via locks
  94 * for consistency.
  95 * In user-mode emulation access to the memory related structures are protected
  96 * with mmap_lock.
  97 * In !user-mode we use per-page locks.
  98 */
  99#ifdef CONFIG_SOFTMMU
 100#define assert_memory_lock()
 101#else
 102#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
 103#endif
 104
 105#define SMC_BITMAP_USE_THRESHOLD 10
 106
 107typedef struct PageDesc {
 108    /* list of TBs intersecting this ram page */
 109    uintptr_t first_tb;
 110#ifdef CONFIG_SOFTMMU
 111    /* in order to optimize self modifying code, we count the number
 112       of lookups we do to a given page to use a bitmap */
 113    unsigned long *code_bitmap;
 114    unsigned int code_write_count;
 115#else
 116    unsigned long flags;
 117    void *target_data;
 118#endif
 119#ifndef CONFIG_USER_ONLY
 120    QemuSpin lock;
 121#endif
 122} PageDesc;
 123
 124/**
 125 * struct page_entry - page descriptor entry
 126 * @pd:     pointer to the &struct PageDesc of the page this entry represents
 127 * @index:  page index of the page
 128 * @locked: whether the page is locked
 129 *
 130 * This struct helps us keep track of the locked state of a page, without
 131 * bloating &struct PageDesc.
 132 *
 133 * A page lock protects accesses to all fields of &struct PageDesc.
 134 *
 135 * See also: &struct page_collection.
 136 */
 137struct page_entry {
 138    PageDesc *pd;
 139    tb_page_addr_t index;
 140    bool locked;
 141};
 142
 143/**
 144 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
 145 * @tree:   Binary search tree (BST) of the pages, with key == page index
 146 * @max:    Pointer to the page in @tree with the highest page index
 147 *
 148 * To avoid deadlock we lock pages in ascending order of page index.
 149 * When operating on a set of pages, we need to keep track of them so that
 150 * we can lock them in order and also unlock them later. For this we collect
 151 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
 152 * @tree implementation we use does not provide an O(1) operation to obtain the
 153 * highest-ranked element, we use @max to keep track of the inserted page
 154 * with the highest index. This is valuable because if a page is not in
 155 * the tree and its index is higher than @max's, then we can lock it
 156 * without breaking the locking order rule.
 157 *
 158 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 159 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 160 *
 161 * See also: page_collection_lock().
 162 */
 163struct page_collection {
 164    GTree *tree;
 165    struct page_entry *max;
 166};
 167
 168/* list iterators for lists of tagged pointers in TranslationBlock */
 169#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
 170    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
 171         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
 172             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
 173
 174#define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
 175    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
 176
 177#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
 178    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 179
 180/*
 181 * In system mode we want L1_MAP to be based on ram offsets,
 182 * while in user mode we want it to be based on virtual addresses.
 183 *
 184 * TODO: For user mode, see the caveat re host vs guest virtual
 185 * address spaces near GUEST_ADDR_MAX.
 186 */
 187#if !defined(CONFIG_USER_ONLY)
 188#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 189# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 190#else
 191# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 192#endif
 193#else
 194# define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
 195#endif
 196
 197/* Size of the L2 (and L3, etc) page tables.  */
 198#define V_L2_BITS 10
 199#define V_L2_SIZE (1 << V_L2_BITS)
 200
 201/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
 202QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
 203                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
 204                  * BITS_PER_BYTE);
 205
 206/*
 207 * L1 Mapping properties
 208 */
 209static int v_l1_size;
 210static int v_l1_shift;
 211static int v_l2_levels;
 212
 213/* The bottom level has pointers to PageDesc, and is indexed by
 214 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
 215 */
 216#define V_L1_MIN_BITS 4
 217#define V_L1_MAX_BITS (V_L2_BITS + 3)
 218#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
 219
 220static void *l1_map[V_L1_MAX_SIZE];
 221
 222TBContext tb_ctx;
 223
 224static void page_table_config_init(void)
 225{
 226    uint32_t v_l1_bits;
 227
 228    assert(TARGET_PAGE_BITS);
 229    /* The bits remaining after N lower levels of page tables.  */
 230    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 231    if (v_l1_bits < V_L1_MIN_BITS) {
 232        v_l1_bits += V_L2_BITS;
 233    }
 234
 235    v_l1_size = 1 << v_l1_bits;
 236    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 237    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 238
 239    assert(v_l1_bits <= V_L1_MAX_BITS);
 240    assert(v_l1_shift % V_L2_BITS == 0);
 241    assert(v_l2_levels >= 0);
 242}
 243
 244/* Encode VAL as a signed leb128 sequence at P.
 245   Return P incremented past the encoded value.  */
 246static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 247{
 248    int more, byte;
 249
 250    do {
 251        byte = val & 0x7f;
 252        val >>= 7;
 253        more = !((val == 0 && (byte & 0x40) == 0)
 254                 || (val == -1 && (byte & 0x40) != 0));
 255        if (more) {
 256            byte |= 0x80;
 257        }
 258        *p++ = byte;
 259    } while (more);
 260
 261    return p;
 262}
 263
 264/* Decode a signed leb128 sequence at *PP; increment *PP past the
 265   decoded value.  Return the decoded value.  */
 266static target_long decode_sleb128(const uint8_t **pp)
 267{
 268    const uint8_t *p = *pp;
 269    target_long val = 0;
 270    int byte, shift = 0;
 271
 272    do {
 273        byte = *p++;
 274        val |= (target_ulong)(byte & 0x7f) << shift;
 275        shift += 7;
 276    } while (byte & 0x80);
 277    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 278        val |= -(target_ulong)1 << shift;
 279    }
 280
 281    *pp = p;
 282    return val;
 283}
 284
 285/* Encode the data collected about the instructions while compiling TB.
 286   Place the data at BLOCK, and return the number of bytes consumed.
 287
 288   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 289   which come from the target's insn_start data, followed by a uintptr_t
 290   which comes from the host pc of the end of the code implementing the insn.
 291
 292   Each line of the table is encoded as sleb128 deltas from the previous
 293   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 294   That is, the first column is seeded with the guest pc, the last column
 295   with the host pc, and the middle columns with zeros.  */
 296
 297static int encode_search(TranslationBlock *tb, uint8_t *block)
 298{
 299    uint8_t *highwater = tcg_ctx->code_gen_highwater;
 300    uint8_t *p = block;
 301    int i, j, n;
 302
 303    for (i = 0, n = tb->icount; i < n; ++i) {
 304        target_ulong prev;
 305
 306        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 307            if (i == 0) {
 308                prev = (j == 0 ? tb->pc : 0);
 309            } else {
 310                prev = tcg_ctx->gen_insn_data[i - 1][j];
 311            }
 312            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
 313        }
 314        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
 315        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
 316
 317        /* Test for (pending) buffer overflow.  The assumption is that any
 318           one row beginning below the high water mark cannot overrun
 319           the buffer completely.  Thus we can test for overflow after
 320           encoding a row without having to check during encoding.  */
 321        if (unlikely(p > highwater)) {
 322            return -1;
 323        }
 324    }
 325
 326    return p - block;
 327}
 328
 329/* The cpu state corresponding to 'searched_pc' is restored.
 330 * When reset_icount is true, current TB will be interrupted and
 331 * icount should be recalculated.
 332 */
 333static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 334                                     uintptr_t searched_pc, bool reset_icount)
 335{
 336    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
 337    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
 338    CPUArchState *env = cpu->env_ptr;
 339    const uint8_t *p = tb->tc.ptr + tb->tc.size;
 340    int i, j, num_insns = tb->icount;
 341#ifdef CONFIG_PROFILER
 342    TCGProfile *prof = &tcg_ctx->prof;
 343    int64_t ti = profile_getclock();
 344#endif
 345
 346    searched_pc -= GETPC_ADJ;
 347
 348    if (searched_pc < host_pc) {
 349        return -1;
 350    }
 351
 352    /* Reconstruct the stored insn data while looking for the point at
 353       which the end of the insn exceeds the searched_pc.  */
 354    for (i = 0; i < num_insns; ++i) {
 355        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 356            data[j] += decode_sleb128(&p);
 357        }
 358        host_pc += decode_sleb128(&p);
 359        if (host_pc > searched_pc) {
 360            goto found;
 361        }
 362    }
 363    return -1;
 364
 365 found:
 366    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
 367        assert(icount_enabled());
 368        /* Reset the cycle counter to the start of the block
 369           and shift if to the number of actually executed instructions */
 370        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
 371    }
 372    restore_state_to_opc(env, tb, data);
 373
 374#ifdef CONFIG_PROFILER
 375    qatomic_set(&prof->restore_time,
 376                prof->restore_time + profile_getclock() - ti);
 377    qatomic_set(&prof->restore_count, prof->restore_count + 1);
 378#endif
 379    return 0;
 380}
 381
 382bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
 383{
 384    /*
 385     * The host_pc has to be in the rx region of the code buffer.
 386     * If it is not we will not be able to resolve it here.
 387     * The two cases where host_pc will not be correct are:
 388     *
 389     *  - fault during translation (instruction fetch)
 390     *  - fault from helper (not using GETPC() macro)
 391     *
 392     * Either way we need return early as we can't resolve it here.
 393     */
 394    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 395        TranslationBlock *tb = tcg_tb_lookup(host_pc);
 396        if (tb) {
 397            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
 398            return true;
 399        }
 400    }
 401    return false;
 402}
 403
 404void page_init(void)
 405{
 406    page_size_init();
 407    page_table_config_init();
 408
 409#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 410    {
 411#ifdef HAVE_KINFO_GETVMMAP
 412        struct kinfo_vmentry *freep;
 413        int i, cnt;
 414
 415        freep = kinfo_getvmmap(getpid(), &cnt);
 416        if (freep) {
 417            mmap_lock();
 418            for (i = 0; i < cnt; i++) {
 419                unsigned long startaddr, endaddr;
 420
 421                startaddr = freep[i].kve_start;
 422                endaddr = freep[i].kve_end;
 423                if (h2g_valid(startaddr)) {
 424                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 425
 426                    if (h2g_valid(endaddr)) {
 427                        endaddr = h2g(endaddr);
 428                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 429                    } else {
 430#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 431                        endaddr = ~0ul;
 432                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 433#endif
 434                    }
 435                }
 436            }
 437            free(freep);
 438            mmap_unlock();
 439        }
 440#else
 441        FILE *f;
 442
 443        last_brk = (unsigned long)sbrk(0);
 444
 445        f = fopen("/compat/linux/proc/self/maps", "r");
 446        if (f) {
 447            mmap_lock();
 448
 449            do {
 450                unsigned long startaddr, endaddr;
 451                int n;
 452
 453                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 454
 455                if (n == 2 && h2g_valid(startaddr)) {
 456                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 457
 458                    if (h2g_valid(endaddr)) {
 459                        endaddr = h2g(endaddr);
 460                    } else {
 461                        endaddr = ~0ul;
 462                    }
 463                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 464                }
 465            } while (!feof(f));
 466
 467            fclose(f);
 468            mmap_unlock();
 469        }
 470#endif
 471    }
 472#endif
 473}
 474
 475static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 476{
 477    PageDesc *pd;
 478    void **lp;
 479    int i;
 480
 481    /* Level 1.  Always allocated.  */
 482    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 483
 484    /* Level 2..N-1.  */
 485    for (i = v_l2_levels; i > 0; i--) {
 486        void **p = qatomic_rcu_read(lp);
 487
 488        if (p == NULL) {
 489            void *existing;
 490
 491            if (!alloc) {
 492                return NULL;
 493            }
 494            p = g_new0(void *, V_L2_SIZE);
 495            existing = qatomic_cmpxchg(lp, NULL, p);
 496            if (unlikely(existing)) {
 497                g_free(p);
 498                p = existing;
 499            }
 500        }
 501
 502        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 503    }
 504
 505    pd = qatomic_rcu_read(lp);
 506    if (pd == NULL) {
 507        void *existing;
 508
 509        if (!alloc) {
 510            return NULL;
 511        }
 512        pd = g_new0(PageDesc, V_L2_SIZE);
 513#ifndef CONFIG_USER_ONLY
 514        {
 515            int i;
 516
 517            for (i = 0; i < V_L2_SIZE; i++) {
 518                qemu_spin_init(&pd[i].lock);
 519            }
 520        }
 521#endif
 522        existing = qatomic_cmpxchg(lp, NULL, pd);
 523        if (unlikely(existing)) {
 524#ifndef CONFIG_USER_ONLY
 525            {
 526                int i;
 527
 528                for (i = 0; i < V_L2_SIZE; i++) {
 529                    qemu_spin_destroy(&pd[i].lock);
 530                }
 531            }
 532#endif
 533            g_free(pd);
 534            pd = existing;
 535        }
 536    }
 537
 538    return pd + (index & (V_L2_SIZE - 1));
 539}
 540
 541static inline PageDesc *page_find(tb_page_addr_t index)
 542{
 543    return page_find_alloc(index, 0);
 544}
 545
 546static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 547                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
 548
 549/* In user-mode page locks aren't used; mmap_lock is enough */
 550#ifdef CONFIG_USER_ONLY
 551
 552#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
 553
 554static inline void page_lock(PageDesc *pd)
 555{ }
 556
 557static inline void page_unlock(PageDesc *pd)
 558{ }
 559
 560static inline void page_lock_tb(const TranslationBlock *tb)
 561{ }
 562
 563static inline void page_unlock_tb(const TranslationBlock *tb)
 564{ }
 565
 566struct page_collection *
 567page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 568{
 569    return NULL;
 570}
 571
 572void page_collection_unlock(struct page_collection *set)
 573{ }
 574#else /* !CONFIG_USER_ONLY */
 575
 576#ifdef CONFIG_DEBUG_TCG
 577
 578static __thread GHashTable *ht_pages_locked_debug;
 579
 580static void ht_pages_locked_debug_init(void)
 581{
 582    if (ht_pages_locked_debug) {
 583        return;
 584    }
 585    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 586}
 587
 588static bool page_is_locked(const PageDesc *pd)
 589{
 590    PageDesc *found;
 591
 592    ht_pages_locked_debug_init();
 593    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 594    return !!found;
 595}
 596
 597static void page_lock__debug(PageDesc *pd)
 598{
 599    ht_pages_locked_debug_init();
 600    g_assert(!page_is_locked(pd));
 601    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 602}
 603
 604static void page_unlock__debug(const PageDesc *pd)
 605{
 606    bool removed;
 607
 608    ht_pages_locked_debug_init();
 609    g_assert(page_is_locked(pd));
 610    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 611    g_assert(removed);
 612}
 613
 614static void
 615do_assert_page_locked(const PageDesc *pd, const char *file, int line)
 616{
 617    if (unlikely(!page_is_locked(pd))) {
 618        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 619                     pd, file, line);
 620        abort();
 621    }
 622}
 623
 624#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
 625
 626void assert_no_pages_locked(void)
 627{
 628    ht_pages_locked_debug_init();
 629    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 630}
 631
 632#else /* !CONFIG_DEBUG_TCG */
 633
 634#define assert_page_locked(pd)
 635
 636static inline void page_lock__debug(const PageDesc *pd)
 637{
 638}
 639
 640static inline void page_unlock__debug(const PageDesc *pd)
 641{
 642}
 643
 644#endif /* CONFIG_DEBUG_TCG */
 645
 646static inline void page_lock(PageDesc *pd)
 647{
 648    page_lock__debug(pd);
 649    qemu_spin_lock(&pd->lock);
 650}
 651
 652static inline void page_unlock(PageDesc *pd)
 653{
 654    qemu_spin_unlock(&pd->lock);
 655    page_unlock__debug(pd);
 656}
 657
 658/* lock the page(s) of a TB in the correct acquisition order */
 659static inline void page_lock_tb(const TranslationBlock *tb)
 660{
 661    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
 662}
 663
 664static inline void page_unlock_tb(const TranslationBlock *tb)
 665{
 666    PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 667
 668    page_unlock(p1);
 669    if (unlikely(tb->page_addr[1] != -1)) {
 670        PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 671
 672        if (p2 != p1) {
 673            page_unlock(p2);
 674        }
 675    }
 676}
 677
 678static inline struct page_entry *
 679page_entry_new(PageDesc *pd, tb_page_addr_t index)
 680{
 681    struct page_entry *pe = g_malloc(sizeof(*pe));
 682
 683    pe->index = index;
 684    pe->pd = pd;
 685    pe->locked = false;
 686    return pe;
 687}
 688
 689static void page_entry_destroy(gpointer p)
 690{
 691    struct page_entry *pe = p;
 692
 693    g_assert(pe->locked);
 694    page_unlock(pe->pd);
 695    g_free(pe);
 696}
 697
 698/* returns false on success */
 699static bool page_entry_trylock(struct page_entry *pe)
 700{
 701    bool busy;
 702
 703    busy = qemu_spin_trylock(&pe->pd->lock);
 704    if (!busy) {
 705        g_assert(!pe->locked);
 706        pe->locked = true;
 707        page_lock__debug(pe->pd);
 708    }
 709    return busy;
 710}
 711
 712static void do_page_entry_lock(struct page_entry *pe)
 713{
 714    page_lock(pe->pd);
 715    g_assert(!pe->locked);
 716    pe->locked = true;
 717}
 718
 719static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 720{
 721    struct page_entry *pe = value;
 722
 723    do_page_entry_lock(pe);
 724    return FALSE;
 725}
 726
 727static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 728{
 729    struct page_entry *pe = value;
 730
 731    if (pe->locked) {
 732        pe->locked = false;
 733        page_unlock(pe->pd);
 734    }
 735    return FALSE;
 736}
 737
 738/*
 739 * Trylock a page, and if successful, add the page to a collection.
 740 * Returns true ("busy") if the page could not be locked; false otherwise.
 741 */
 742static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 743{
 744    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 745    struct page_entry *pe;
 746    PageDesc *pd;
 747
 748    pe = g_tree_lookup(set->tree, &index);
 749    if (pe) {
 750        return false;
 751    }
 752
 753    pd = page_find(index);
 754    if (pd == NULL) {
 755        return false;
 756    }
 757
 758    pe = page_entry_new(pd, index);
 759    g_tree_insert(set->tree, &pe->index, pe);
 760
 761    /*
 762     * If this is either (1) the first insertion or (2) a page whose index
 763     * is higher than any other so far, just lock the page and move on.
 764     */
 765    if (set->max == NULL || pe->index > set->max->index) {
 766        set->max = pe;
 767        do_page_entry_lock(pe);
 768        return false;
 769    }
 770    /*
 771     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 772     * locks in order.
 773     */
 774    return page_entry_trylock(pe);
 775}
 776
 777static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 778{
 779    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 780    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 781
 782    if (a == b) {
 783        return 0;
 784    } else if (a < b) {
 785        return -1;
 786    }
 787    return 1;
 788}
 789
 790/*
 791 * Lock a range of pages ([@start,@end[) as well as the pages of all
 792 * intersecting TBs.
 793 * Locking order: acquire locks in ascending order of page index.
 794 */
 795struct page_collection *
 796page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 797{
 798    struct page_collection *set = g_malloc(sizeof(*set));
 799    tb_page_addr_t index;
 800    PageDesc *pd;
 801
 802    start >>= TARGET_PAGE_BITS;
 803    end   >>= TARGET_PAGE_BITS;
 804    g_assert(start <= end);
 805
 806    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 807                                page_entry_destroy);
 808    set->max = NULL;
 809    assert_no_pages_locked();
 810
 811 retry:
 812    g_tree_foreach(set->tree, page_entry_lock, NULL);
 813
 814    for (index = start; index <= end; index++) {
 815        TranslationBlock *tb;
 816        int n;
 817
 818        pd = page_find(index);
 819        if (pd == NULL) {
 820            continue;
 821        }
 822        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 823            g_tree_foreach(set->tree, page_entry_unlock, NULL);
 824            goto retry;
 825        }
 826        assert_page_locked(pd);
 827        PAGE_FOR_EACH_TB(pd, tb, n) {
 828            if (page_trylock_add(set, tb->page_addr[0]) ||
 829                (tb->page_addr[1] != -1 &&
 830                 page_trylock_add(set, tb->page_addr[1]))) {
 831                /* drop all locks, and reacquire in order */
 832                g_tree_foreach(set->tree, page_entry_unlock, NULL);
 833                goto retry;
 834            }
 835        }
 836    }
 837    return set;
 838}
 839
 840void page_collection_unlock(struct page_collection *set)
 841{
 842    /* entries are unlocked and freed via page_entry_destroy */
 843    g_tree_destroy(set->tree);
 844    g_free(set);
 845}
 846
 847#endif /* !CONFIG_USER_ONLY */
 848
 849static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 850                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
 851{
 852    PageDesc *p1, *p2;
 853    tb_page_addr_t page1;
 854    tb_page_addr_t page2;
 855
 856    assert_memory_lock();
 857    g_assert(phys1 != -1);
 858
 859    page1 = phys1 >> TARGET_PAGE_BITS;
 860    page2 = phys2 >> TARGET_PAGE_BITS;
 861
 862    p1 = page_find_alloc(page1, alloc);
 863    if (ret_p1) {
 864        *ret_p1 = p1;
 865    }
 866    if (likely(phys2 == -1)) {
 867        page_lock(p1);
 868        return;
 869    } else if (page1 == page2) {
 870        page_lock(p1);
 871        if (ret_p2) {
 872            *ret_p2 = p1;
 873        }
 874        return;
 875    }
 876    p2 = page_find_alloc(page2, alloc);
 877    if (ret_p2) {
 878        *ret_p2 = p2;
 879    }
 880    if (page1 < page2) {
 881        page_lock(p1);
 882        page_lock(p2);
 883    } else {
 884        page_lock(p2);
 885        page_lock(p1);
 886    }
 887}
 888
 889static bool tb_cmp(const void *ap, const void *bp)
 890{
 891    const TranslationBlock *a = ap;
 892    const TranslationBlock *b = bp;
 893
 894    return a->pc == b->pc &&
 895        a->cs_base == b->cs_base &&
 896        a->flags == b->flags &&
 897        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
 898        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
 899        a->page_addr[0] == b->page_addr[0] &&
 900        a->page_addr[1] == b->page_addr[1];
 901}
 902
 903void tb_htable_init(void)
 904{
 905    unsigned int mode = QHT_MODE_AUTO_RESIZE;
 906
 907    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
 908}
 909
 910/* call with @p->lock held */
 911static inline void invalidate_page_bitmap(PageDesc *p)
 912{
 913    assert_page_locked(p);
 914#ifdef CONFIG_SOFTMMU
 915    g_free(p->code_bitmap);
 916    p->code_bitmap = NULL;
 917    p->code_write_count = 0;
 918#endif
 919}
 920
 921/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 922static void page_flush_tb_1(int level, void **lp)
 923{
 924    int i;
 925
 926    if (*lp == NULL) {
 927        return;
 928    }
 929    if (level == 0) {
 930        PageDesc *pd = *lp;
 931
 932        for (i = 0; i < V_L2_SIZE; ++i) {
 933            page_lock(&pd[i]);
 934            pd[i].first_tb = (uintptr_t)NULL;
 935            invalidate_page_bitmap(pd + i);
 936            page_unlock(&pd[i]);
 937        }
 938    } else {
 939        void **pp = *lp;
 940
 941        for (i = 0; i < V_L2_SIZE; ++i) {
 942            page_flush_tb_1(level - 1, pp + i);
 943        }
 944    }
 945}
 946
 947static void page_flush_tb(void)
 948{
 949    int i, l1_sz = v_l1_size;
 950
 951    for (i = 0; i < l1_sz; i++) {
 952        page_flush_tb_1(v_l2_levels, l1_map + i);
 953    }
 954}
 955
 956static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
 957{
 958    const TranslationBlock *tb = value;
 959    size_t *size = data;
 960
 961    *size += tb->tc.size;
 962    return false;
 963}
 964
 965/* flush all the translation blocks */
 966static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 967{
 968    bool did_flush = false;
 969
 970    mmap_lock();
 971    /* If it is already been done on request of another CPU,
 972     * just retry.
 973     */
 974    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
 975        goto done;
 976    }
 977    did_flush = true;
 978
 979    if (DEBUG_TB_FLUSH_GATE) {
 980        size_t nb_tbs = tcg_nb_tbs();
 981        size_t host_size = 0;
 982
 983        tcg_tb_foreach(tb_host_size_iter, &host_size);
 984        printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
 985               tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
 986    }
 987
 988    CPU_FOREACH(cpu) {
 989        cpu_tb_jmp_cache_clear(cpu);
 990    }
 991
 992    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 993    page_flush_tb();
 994
 995    tcg_region_reset_all();
 996    /* XXX: flush processor icache at this point if cache flush is
 997       expensive */
 998    qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
 999
1000done:
1001    mmap_unlock();
1002    if (did_flush) {
1003        qemu_plugin_flush_cb();
1004    }
1005}
1006
1007void tb_flush(CPUState *cpu)
1008{
1009    if (tcg_enabled()) {
1010        unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
1011
1012        if (cpu_in_exclusive_context(cpu)) {
1013            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1014        } else {
1015            async_safe_run_on_cpu(cpu, do_tb_flush,
1016                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
1017        }
1018    }
1019}
1020
1021/*
1022 * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1023 * so in order to prevent bit rot we compile them unconditionally in user-mode,
1024 * and let the optimizer get rid of them by wrapping their user-only callers
1025 * with if (DEBUG_TB_CHECK_GATE).
1026 */
1027#ifdef CONFIG_USER_ONLY
1028
1029static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1030{
1031    TranslationBlock *tb = p;
1032    target_ulong addr = *(target_ulong *)userp;
1033
1034    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1035        printf("ERROR invalidate: address=" TARGET_FMT_lx
1036               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1037    }
1038}
1039
1040/* verify that all the pages have correct rights for code
1041 *
1042 * Called with mmap_lock held.
1043 */
1044static void tb_invalidate_check(target_ulong address)
1045{
1046    address &= TARGET_PAGE_MASK;
1047    qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1048}
1049
1050static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1051{
1052    TranslationBlock *tb = p;
1053    int flags1, flags2;
1054
1055    flags1 = page_get_flags(tb->pc);
1056    flags2 = page_get_flags(tb->pc + tb->size - 1);
1057    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1058        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1059               (long)tb->pc, tb->size, flags1, flags2);
1060    }
1061}
1062
1063/* verify that all the pages have correct rights for code */
1064static void tb_page_check(void)
1065{
1066    qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1067}
1068
1069#endif /* CONFIG_USER_ONLY */
1070
1071/*
1072 * user-mode: call with mmap_lock held
1073 * !user-mode: call with @pd->lock held
1074 */
1075static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1076{
1077    TranslationBlock *tb1;
1078    uintptr_t *pprev;
1079    unsigned int n1;
1080
1081    assert_page_locked(pd);
1082    pprev = &pd->first_tb;
1083    PAGE_FOR_EACH_TB(pd, tb1, n1) {
1084        if (tb1 == tb) {
1085            *pprev = tb1->page_next[n1];
1086            return;
1087        }
1088        pprev = &tb1->page_next[n1];
1089    }
1090    g_assert_not_reached();
1091}
1092
1093/* remove @orig from its @n_orig-th jump list */
1094static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1095{
1096    uintptr_t ptr, ptr_locked;
1097    TranslationBlock *dest;
1098    TranslationBlock *tb;
1099    uintptr_t *pprev;
1100    int n;
1101
1102    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1103    ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1104    dest = (TranslationBlock *)(ptr & ~1);
1105    if (dest == NULL) {
1106        return;
1107    }
1108
1109    qemu_spin_lock(&dest->jmp_lock);
1110    /*
1111     * While acquiring the lock, the jump might have been removed if the
1112     * destination TB was invalidated; check again.
1113     */
1114    ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
1115    if (ptr_locked != ptr) {
1116        qemu_spin_unlock(&dest->jmp_lock);
1117        /*
1118         * The only possibility is that the jump was unlinked via
1119         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1120         * because we set the LSB above.
1121         */
1122        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1123        return;
1124    }
1125    /*
1126     * We first acquired the lock, and since the destination pointer matches,
1127     * we know for sure that @orig is in the jmp list.
1128     */
1129    pprev = &dest->jmp_list_head;
1130    TB_FOR_EACH_JMP(dest, tb, n) {
1131        if (tb == orig && n == n_orig) {
1132            *pprev = tb->jmp_list_next[n];
1133            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1134            qemu_spin_unlock(&dest->jmp_lock);
1135            return;
1136        }
1137        pprev = &tb->jmp_list_next[n];
1138    }
1139    g_assert_not_reached();
1140}
1141
1142/* reset the jump entry 'n' of a TB so that it is not chained to
1143   another TB */
1144static inline void tb_reset_jump(TranslationBlock *tb, int n)
1145{
1146    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1147    tb_set_jmp_target(tb, n, addr);
1148}
1149
1150/* remove any jumps to the TB */
1151static inline void tb_jmp_unlink(TranslationBlock *dest)
1152{
1153    TranslationBlock *tb;
1154    int n;
1155
1156    qemu_spin_lock(&dest->jmp_lock);
1157
1158    TB_FOR_EACH_JMP(dest, tb, n) {
1159        tb_reset_jump(tb, n);
1160        qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1161        /* No need to clear the list entry; setting the dest ptr is enough */
1162    }
1163    dest->jmp_list_head = (uintptr_t)NULL;
1164
1165    qemu_spin_unlock(&dest->jmp_lock);
1166}
1167
1168/*
1169 * In user-mode, call with mmap_lock held.
1170 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1171 * locks held.
1172 */
1173static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1174{
1175    CPUState *cpu;
1176    PageDesc *p;
1177    uint32_t h;
1178    tb_page_addr_t phys_pc;
1179    uint32_t orig_cflags = tb_cflags(tb);
1180
1181    assert_memory_lock();
1182
1183    /* make sure no further incoming jumps will be chained to this TB */
1184    qemu_spin_lock(&tb->jmp_lock);
1185    qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1186    qemu_spin_unlock(&tb->jmp_lock);
1187
1188    /* remove the TB from the hash list */
1189    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1190    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
1191                     tb->trace_vcpu_dstate);
1192    if (!qht_remove(&tb_ctx.htable, tb, h)) {
1193        return;
1194    }
1195
1196    /* remove the TB from the page list */
1197    if (rm_from_page_list) {
1198        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1199        tb_page_remove(p, tb);
1200        invalidate_page_bitmap(p);
1201        if (tb->page_addr[1] != -1) {
1202            p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1203            tb_page_remove(p, tb);
1204            invalidate_page_bitmap(p);
1205        }
1206    }
1207
1208    /* remove the TB from the hash list */
1209    h = tb_jmp_cache_hash_func(tb->pc);
1210    CPU_FOREACH(cpu) {
1211        if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1212            qatomic_set(&cpu->tb_jmp_cache[h], NULL);
1213        }
1214    }
1215
1216    /* suppress this TB from the two jump lists */
1217    tb_remove_from_jmp_list(tb, 0);
1218    tb_remove_from_jmp_list(tb, 1);
1219
1220    /* suppress any remaining jumps to this TB */
1221    tb_jmp_unlink(tb);
1222
1223    qatomic_set(&tb_ctx.tb_phys_invalidate_count,
1224                tb_ctx.tb_phys_invalidate_count + 1);
1225}
1226
1227static void tb_phys_invalidate__locked(TranslationBlock *tb)
1228{
1229    qemu_thread_jit_write();
1230    do_tb_phys_invalidate(tb, true);
1231    qemu_thread_jit_execute();
1232}
1233
1234/* invalidate one TB
1235 *
1236 * Called with mmap_lock held in user-mode.
1237 */
1238void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1239{
1240    if (page_addr == -1 && tb->page_addr[0] != -1) {
1241        page_lock_tb(tb);
1242        do_tb_phys_invalidate(tb, true);
1243        page_unlock_tb(tb);
1244    } else {
1245        do_tb_phys_invalidate(tb, false);
1246    }
1247}
1248
1249#ifdef CONFIG_SOFTMMU
1250/* call with @p->lock held */
1251static void build_page_bitmap(PageDesc *p)
1252{
1253    int n, tb_start, tb_end;
1254    TranslationBlock *tb;
1255
1256    assert_page_locked(p);
1257    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1258
1259    PAGE_FOR_EACH_TB(p, tb, n) {
1260        /* NOTE: this is subtle as a TB may span two physical pages */
1261        if (n == 0) {
1262            /* NOTE: tb_end may be after the end of the page, but
1263               it is not a problem */
1264            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1265            tb_end = tb_start + tb->size;
1266            if (tb_end > TARGET_PAGE_SIZE) {
1267                tb_end = TARGET_PAGE_SIZE;
1268             }
1269        } else {
1270            tb_start = 0;
1271            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1272        }
1273        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1274    }
1275}
1276#endif
1277
1278/* add the tb in the target page and protect it if necessary
1279 *
1280 * Called with mmap_lock held for user-mode emulation.
1281 * Called with @p->lock held in !user-mode.
1282 */
1283static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1284                               unsigned int n, tb_page_addr_t page_addr)
1285{
1286#ifndef CONFIG_USER_ONLY
1287    bool page_already_protected;
1288#endif
1289
1290    assert_page_locked(p);
1291
1292    tb->page_addr[n] = page_addr;
1293    tb->page_next[n] = p->first_tb;
1294#ifndef CONFIG_USER_ONLY
1295    page_already_protected = p->first_tb != (uintptr_t)NULL;
1296#endif
1297    p->first_tb = (uintptr_t)tb | n;
1298    invalidate_page_bitmap(p);
1299
1300#if defined(CONFIG_USER_ONLY)
1301    /* translator_loop() must have made all TB pages non-writable */
1302    assert(!(p->flags & PAGE_WRITE));
1303#else
1304    /* if some code is already present, then the pages are already
1305       protected. So we handle the case where only the first TB is
1306       allocated in a physical page */
1307    if (!page_already_protected) {
1308        tlb_protect_code(page_addr);
1309    }
1310#endif
1311}
1312
1313/*
1314 * Add a new TB and link it to the physical page tables. phys_page2 is
1315 * (-1) to indicate that only one page contains the TB.
1316 *
1317 * Called with mmap_lock held for user-mode emulation.
1318 *
1319 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1320 * Note that in !user-mode, another thread might have already added a TB
1321 * for the same block of guest code that @tb corresponds to. In that case,
1322 * the caller should discard the original @tb, and use instead the returned TB.
1323 */
1324static TranslationBlock *
1325tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1326             tb_page_addr_t phys_page2)
1327{
1328    PageDesc *p;
1329    PageDesc *p2 = NULL;
1330    void *existing_tb = NULL;
1331    uint32_t h;
1332
1333    assert_memory_lock();
1334    tcg_debug_assert(!(tb->cflags & CF_INVALID));
1335
1336    /*
1337     * Add the TB to the page list, acquiring first the pages's locks.
1338     * We keep the locks held until after inserting the TB in the hash table,
1339     * so that if the insertion fails we know for sure that the TBs are still
1340     * in the page descriptors.
1341     * Note that inserting into the hash table first isn't an option, since
1342     * we can only insert TBs that are fully initialized.
1343     */
1344    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1345    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1346    if (p2) {
1347        tb_page_add(p2, tb, 1, phys_page2);
1348    } else {
1349        tb->page_addr[1] = -1;
1350    }
1351
1352    /* add in the hash table */
1353    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
1354                     tb->trace_vcpu_dstate);
1355    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1356
1357    /* remove TB from the page(s) if we couldn't insert it */
1358    if (unlikely(existing_tb)) {
1359        tb_page_remove(p, tb);
1360        invalidate_page_bitmap(p);
1361        if (p2) {
1362            tb_page_remove(p2, tb);
1363            invalidate_page_bitmap(p2);
1364        }
1365        tb = existing_tb;
1366    }
1367
1368    if (p2 && p2 != p) {
1369        page_unlock(p2);
1370    }
1371    page_unlock(p);
1372
1373#ifdef CONFIG_USER_ONLY
1374    if (DEBUG_TB_CHECK_GATE) {
1375        tb_page_check();
1376    }
1377#endif
1378    return tb;
1379}
1380
1381/* Called with mmap_lock held for user mode emulation.  */
1382TranslationBlock *tb_gen_code(CPUState *cpu,
1383                              target_ulong pc, target_ulong cs_base,
1384                              uint32_t flags, int cflags)
1385{
1386    CPUArchState *env = cpu->env_ptr;
1387    TranslationBlock *tb, *existing_tb;
1388    tb_page_addr_t phys_pc, phys_page2;
1389    target_ulong virt_page2;
1390    tcg_insn_unit *gen_code_buf;
1391    int gen_code_size, search_size, max_insns;
1392#ifdef CONFIG_PROFILER
1393    TCGProfile *prof = &tcg_ctx->prof;
1394    int64_t ti;
1395#endif
1396
1397    assert_memory_lock();
1398    qemu_thread_jit_write();
1399
1400    phys_pc = get_page_addr_code(env, pc);
1401
1402    if (phys_pc == -1) {
1403        /* Generate a one-shot TB with 1 insn in it */
1404        cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
1405    }
1406
1407    max_insns = cflags & CF_COUNT_MASK;
1408    if (max_insns == 0) {
1409        max_insns = TCG_MAX_INSNS;
1410    }
1411    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
1412
1413 buffer_overflow:
1414    tb = tcg_tb_alloc(tcg_ctx);
1415    if (unlikely(!tb)) {
1416        /* flush must be done */
1417        tb_flush(cpu);
1418        mmap_unlock();
1419        /* Make the execution loop process the flush as soon as possible.  */
1420        cpu->exception_index = EXCP_INTERRUPT;
1421        cpu_loop_exit(cpu);
1422    }
1423
1424    gen_code_buf = tcg_ctx->code_gen_ptr;
1425    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
1426    tb->pc = pc;
1427    tb->cs_base = cs_base;
1428    tb->flags = flags;
1429    tb->cflags = cflags;
1430    tb->trace_vcpu_dstate = *cpu->trace_dstate;
1431    tcg_ctx->tb_cflags = cflags;
1432 tb_overflow:
1433
1434#ifdef CONFIG_PROFILER
1435    /* includes aborted translations because of exceptions */
1436    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1437    ti = profile_getclock();
1438#endif
1439
1440    gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
1441    if (unlikely(gen_code_size != 0)) {
1442        goto error_return;
1443    }
1444
1445    tcg_func_start(tcg_ctx);
1446
1447    tcg_ctx->cpu = env_cpu(env);
1448    gen_intermediate_code(cpu, tb, max_insns);
1449    assert(tb->size != 0);
1450    tcg_ctx->cpu = NULL;
1451    max_insns = tb->icount;
1452
1453    trace_translate_block(tb, tb->pc, tb->tc.ptr);
1454
1455    /* generate machine code */
1456    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1457    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1458    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1459    if (TCG_TARGET_HAS_direct_jump) {
1460        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1461        tcg_ctx->tb_jmp_target_addr = NULL;
1462    } else {
1463        tcg_ctx->tb_jmp_insn_offset = NULL;
1464        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1465    }
1466
1467#ifdef CONFIG_PROFILER
1468    qatomic_set(&prof->tb_count, prof->tb_count + 1);
1469    qatomic_set(&prof->interm_time,
1470                prof->interm_time + profile_getclock() - ti);
1471    ti = profile_getclock();
1472#endif
1473
1474    gen_code_size = tcg_gen_code(tcg_ctx, tb);
1475    if (unlikely(gen_code_size < 0)) {
1476 error_return:
1477        switch (gen_code_size) {
1478        case -1:
1479            /*
1480             * Overflow of code_gen_buffer, or the current slice of it.
1481             *
1482             * TODO: We don't need to re-do gen_intermediate_code, nor
1483             * should we re-do the tcg optimization currently hidden
1484             * inside tcg_gen_code.  All that should be required is to
1485             * flush the TBs, allocate a new TB, re-initialize it per
1486             * above, and re-do the actual code generation.
1487             */
1488            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1489                          "Restarting code generation for "
1490                          "code_gen_buffer overflow\n");
1491            goto buffer_overflow;
1492
1493        case -2:
1494            /*
1495             * The code generated for the TranslationBlock is too large.
1496             * The maximum size allowed by the unwind info is 64k.
1497             * There may be stricter constraints from relocations
1498             * in the tcg backend.
1499             *
1500             * Try again with half as many insns as we attempted this time.
1501             * If a single insn overflows, there's a bug somewhere...
1502             */
1503            assert(max_insns > 1);
1504            max_insns /= 2;
1505            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
1506                          "Restarting code generation with "
1507                          "smaller translation block (max %d insns)\n",
1508                          max_insns);
1509            goto tb_overflow;
1510
1511        default:
1512            g_assert_not_reached();
1513        }
1514    }
1515    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1516    if (unlikely(search_size < 0)) {
1517        goto buffer_overflow;
1518    }
1519    tb->tc.size = gen_code_size;
1520
1521#ifdef CONFIG_PROFILER
1522    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1523    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1524    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1525    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1526#endif
1527
1528#ifdef DEBUG_DISAS
1529    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1530        qemu_log_in_addr_range(tb->pc)) {
1531        FILE *logfile = qemu_log_lock();
1532        int code_size, data_size;
1533        const tcg_target_ulong *rx_data_gen_ptr;
1534        size_t chunk_start;
1535        int insn = 0;
1536
1537        if (tcg_ctx->data_gen_ptr) {
1538            rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
1539            code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
1540            data_size = gen_code_size - code_size;
1541        } else {
1542            rx_data_gen_ptr = 0;
1543            code_size = gen_code_size;
1544            data_size = 0;
1545        }
1546
1547        /* Dump header and the first instruction */
1548        qemu_log("OUT: [size=%d]\n", gen_code_size);
1549        qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
1550                 tcg_ctx->gen_insn_data[insn][0]);
1551        chunk_start = tcg_ctx->gen_insn_end_off[insn];
1552        log_disas(tb->tc.ptr, chunk_start);
1553
1554        /*
1555         * Dump each instruction chunk, wrapping up empty chunks into
1556         * the next instruction. The whole array is offset so the
1557         * first entry is the beginning of the 2nd instruction.
1558         */
1559        while (insn < tb->icount) {
1560            size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
1561            if (chunk_end > chunk_start) {
1562                qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
1563                         tcg_ctx->gen_insn_data[insn][0]);
1564                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
1565                chunk_start = chunk_end;
1566            }
1567            insn++;
1568        }
1569
1570        if (chunk_start < code_size) {
1571            qemu_log("  -- tb slow paths + alignment\n");
1572            log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
1573        }
1574
1575        /* Finally dump any data we may have after the block */
1576        if (data_size) {
1577            int i;
1578            qemu_log("  data: [size=%d]\n", data_size);
1579            for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
1580                if (sizeof(tcg_target_ulong) == 8) {
1581                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
1582                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1583                } else if (sizeof(tcg_target_ulong) == 4) {
1584                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
1585                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
1586                } else {
1587                    qemu_build_not_reached();
1588                }
1589            }
1590        }
1591        qemu_log("\n");
1592        qemu_log_flush();
1593        qemu_log_unlock(logfile);
1594    }
1595#endif
1596
1597    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
1598        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1599                 CODE_GEN_ALIGN));
1600
1601    /* init jump list */
1602    qemu_spin_init(&tb->jmp_lock);
1603    tb->jmp_list_head = (uintptr_t)NULL;
1604    tb->jmp_list_next[0] = (uintptr_t)NULL;
1605    tb->jmp_list_next[1] = (uintptr_t)NULL;
1606    tb->jmp_dest[0] = (uintptr_t)NULL;
1607    tb->jmp_dest[1] = (uintptr_t)NULL;
1608
1609    /* init original jump addresses which have been set during tcg_gen_code() */
1610    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1611        tb_reset_jump(tb, 0);
1612    }
1613    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1614        tb_reset_jump(tb, 1);
1615    }
1616
1617    /*
1618     * If the TB is not associated with a physical RAM page then
1619     * it must be a temporary one-insn TB, and we have nothing to do
1620     * except fill in the page_addr[] fields. Return early before
1621     * attempting to link to other TBs or add to the lookup table.
1622     */
1623    if (phys_pc == -1) {
1624        tb->page_addr[0] = tb->page_addr[1] = -1;
1625        return tb;
1626    }
1627
1628    /*
1629     * Insert TB into the corresponding region tree before publishing it
1630     * through QHT. Otherwise rewinding happened in the TB might fail to
1631     * lookup itself using host PC.
1632     */
1633    tcg_tb_insert(tb);
1634
1635    /* check next page if needed */
1636    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1637    phys_page2 = -1;
1638    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1639        phys_page2 = get_page_addr_code(env, virt_page2);
1640    }
1641    /*
1642     * No explicit memory barrier is required -- tb_link_page() makes the
1643     * TB visible in a consistent state.
1644     */
1645    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1646    /* if the TB already exists, discard what we just translated */
1647    if (unlikely(existing_tb != tb)) {
1648        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1649
1650        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1651        qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1652        tcg_tb_remove(tb);
1653        return existing_tb;
1654    }
1655    return tb;
1656}
1657
1658/*
1659 * @p must be non-NULL.
1660 * user-mode: call with mmap_lock held.
1661 * !user-mode: call with all @pages locked.
1662 */
1663static void
1664tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1665                                      PageDesc *p, tb_page_addr_t start,
1666                                      tb_page_addr_t end,
1667                                      uintptr_t retaddr)
1668{
1669    TranslationBlock *tb;
1670    tb_page_addr_t tb_start, tb_end;
1671    int n;
1672#ifdef TARGET_HAS_PRECISE_SMC
1673    CPUState *cpu = current_cpu;
1674    CPUArchState *env = NULL;
1675    bool current_tb_not_found = retaddr != 0;
1676    bool current_tb_modified = false;
1677    TranslationBlock *current_tb = NULL;
1678    target_ulong current_pc = 0;
1679    target_ulong current_cs_base = 0;
1680    uint32_t current_flags = 0;
1681#endif /* TARGET_HAS_PRECISE_SMC */
1682
1683    assert_page_locked(p);
1684
1685#if defined(TARGET_HAS_PRECISE_SMC)
1686    if (cpu != NULL) {
1687        env = cpu->env_ptr;
1688    }
1689#endif
1690
1691    /* we remove all the TBs in the range [start, end[ */
1692    /* XXX: see if in some cases it could be faster to invalidate all
1693       the code */
1694    PAGE_FOR_EACH_TB(p, tb, n) {
1695        assert_page_locked(p);
1696        /* NOTE: this is subtle as a TB may span two physical pages */
1697        if (n == 0) {
1698            /* NOTE: tb_end may be after the end of the page, but
1699               it is not a problem */
1700            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1701            tb_end = tb_start + tb->size;
1702        } else {
1703            tb_start = tb->page_addr[1];
1704            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1705        }
1706        if (!(tb_end <= start || tb_start >= end)) {
1707#ifdef TARGET_HAS_PRECISE_SMC
1708            if (current_tb_not_found) {
1709                current_tb_not_found = false;
1710                /* now we have a real cpu fault */
1711                current_tb = tcg_tb_lookup(retaddr);
1712            }
1713            if (current_tb == tb &&
1714                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1715                /*
1716                 * If we are modifying the current TB, we must stop
1717                 * its execution. We could be more precise by checking
1718                 * that the modification is after the current PC, but it
1719                 * would require a specialized function to partially
1720                 * restore the CPU state.
1721                 */
1722                current_tb_modified = true;
1723                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1724                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1725                                     &current_flags);
1726            }
1727#endif /* TARGET_HAS_PRECISE_SMC */
1728            tb_phys_invalidate__locked(tb);
1729        }
1730    }
1731#if !defined(CONFIG_USER_ONLY)
1732    /* if no code remaining, no need to continue to use slow writes */
1733    if (!p->first_tb) {
1734        invalidate_page_bitmap(p);
1735        tlb_unprotect_code(start);
1736    }
1737#endif
1738#ifdef TARGET_HAS_PRECISE_SMC
1739    if (current_tb_modified) {
1740        page_collection_unlock(pages);
1741        /* Force execution of one insn next time.  */
1742        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1743        mmap_unlock();
1744        cpu_loop_exit_noexc(cpu);
1745    }
1746#endif
1747}
1748
1749/*
1750 * Invalidate all TBs which intersect with the target physical address range
1751 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1752 * 'is_cpu_write_access' should be true if called from a real cpu write
1753 * access: the virtual CPU will exit the current TB if code is modified inside
1754 * this TB.
1755 *
1756 * Called with mmap_lock held for user-mode emulation
1757 */
1758void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1759{
1760    struct page_collection *pages;
1761    PageDesc *p;
1762
1763    assert_memory_lock();
1764
1765    p = page_find(start >> TARGET_PAGE_BITS);
1766    if (p == NULL) {
1767        return;
1768    }
1769    pages = page_collection_lock(start, end);
1770    tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1771    page_collection_unlock(pages);
1772}
1773
1774/*
1775 * Invalidate all TBs which intersect with the target physical address range
1776 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1777 * 'is_cpu_write_access' should be true if called from a real cpu write
1778 * access: the virtual CPU will exit the current TB if code is modified inside
1779 * this TB.
1780 *
1781 * Called with mmap_lock held for user-mode emulation.
1782 */
1783#ifdef CONFIG_SOFTMMU
1784void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
1785#else
1786void tb_invalidate_phys_range(target_ulong start, target_ulong end)
1787#endif
1788{
1789    struct page_collection *pages;
1790    tb_page_addr_t next;
1791
1792    assert_memory_lock();
1793
1794    pages = page_collection_lock(start, end);
1795    for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
1796         start < end;
1797         start = next, next += TARGET_PAGE_SIZE) {
1798        PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
1799        tb_page_addr_t bound = MIN(next, end);
1800
1801        if (pd == NULL) {
1802            continue;
1803        }
1804        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
1805    }
1806    page_collection_unlock(pages);
1807}
1808
1809#ifdef CONFIG_SOFTMMU
1810/* len must be <= 8 and start must be a multiple of len.
1811 * Called via softmmu_template.h when code areas are written to with
1812 * iothread mutex not held.
1813 *
1814 * Call with all @pages in the range [@start, @start + len[ locked.
1815 */
1816void tb_invalidate_phys_page_fast(struct page_collection *pages,
1817                                  tb_page_addr_t start, int len,
1818                                  uintptr_t retaddr)
1819{
1820    PageDesc *p;
1821
1822    assert_memory_lock();
1823
1824    p = page_find(start >> TARGET_PAGE_BITS);
1825    if (!p) {
1826        return;
1827    }
1828
1829    assert_page_locked(p);
1830    if (!p->code_bitmap &&
1831        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
1832        build_page_bitmap(p);
1833    }
1834    if (p->code_bitmap) {
1835        unsigned int nr;
1836        unsigned long b;
1837
1838        nr = start & ~TARGET_PAGE_MASK;
1839        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
1840        if (b & ((1 << len) - 1)) {
1841            goto do_invalidate;
1842        }
1843    } else {
1844    do_invalidate:
1845        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
1846                                              retaddr);
1847    }
1848}
1849#else
1850/* Called with mmap_lock held. If pc is not 0 then it indicates the
1851 * host PC of the faulting store instruction that caused this invalidate.
1852 * Returns true if the caller needs to abort execution of the current
1853 * TB (because it was modified by this store and the guest CPU has
1854 * precise-SMC semantics).
1855 */
1856static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
1857{
1858    TranslationBlock *tb;
1859    PageDesc *p;
1860    int n;
1861#ifdef TARGET_HAS_PRECISE_SMC
1862    TranslationBlock *current_tb = NULL;
1863    CPUState *cpu = current_cpu;
1864    CPUArchState *env = NULL;
1865    int current_tb_modified = 0;
1866    target_ulong current_pc = 0;
1867    target_ulong current_cs_base = 0;
1868    uint32_t current_flags = 0;
1869#endif
1870
1871    assert_memory_lock();
1872
1873    addr &= TARGET_PAGE_MASK;
1874    p = page_find(addr >> TARGET_PAGE_BITS);
1875    if (!p) {
1876        return false;
1877    }
1878
1879#ifdef TARGET_HAS_PRECISE_SMC
1880    if (p->first_tb && pc != 0) {
1881        current_tb = tcg_tb_lookup(pc);
1882    }
1883    if (cpu != NULL) {
1884        env = cpu->env_ptr;
1885    }
1886#endif
1887    assert_page_locked(p);
1888    PAGE_FOR_EACH_TB(p, tb, n) {
1889#ifdef TARGET_HAS_PRECISE_SMC
1890        if (current_tb == tb &&
1891            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1892                /* If we are modifying the current TB, we must stop
1893                   its execution. We could be more precise by checking
1894                   that the modification is after the current PC, but it
1895                   would require a specialized function to partially
1896                   restore the CPU state */
1897
1898            current_tb_modified = 1;
1899            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
1900            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1901                                 &current_flags);
1902        }
1903#endif /* TARGET_HAS_PRECISE_SMC */
1904        tb_phys_invalidate(tb, addr);
1905    }
1906    p->first_tb = (uintptr_t)NULL;
1907#ifdef TARGET_HAS_PRECISE_SMC
1908    if (current_tb_modified) {
1909        /* Force execution of one insn next time.  */
1910        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1911        return true;
1912    }
1913#endif
1914
1915    return false;
1916}
1917#endif
1918
1919/* user-mode: call with mmap_lock held */
1920void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1921{
1922    TranslationBlock *tb;
1923
1924    assert_memory_lock();
1925
1926    tb = tcg_tb_lookup(retaddr);
1927    if (tb) {
1928        /* We can use retranslation to find the PC.  */
1929        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1930        tb_phys_invalidate(tb, -1);
1931    } else {
1932        /* The exception probably happened in a helper.  The CPU state should
1933           have been saved before calling it. Fetch the PC from there.  */
1934        CPUArchState *env = cpu->env_ptr;
1935        target_ulong pc, cs_base;
1936        tb_page_addr_t addr;
1937        uint32_t flags;
1938
1939        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1940        addr = get_page_addr_code(env, pc);
1941        if (addr != -1) {
1942            tb_invalidate_phys_range(addr, addr + 1);
1943        }
1944    }
1945}
1946
1947#ifndef CONFIG_USER_ONLY
1948/*
1949 * In deterministic execution mode, instructions doing device I/Os
1950 * must be at the end of the TB.
1951 *
1952 * Called by softmmu_template.h, with iothread mutex not held.
1953 */
1954void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1955{
1956    TranslationBlock *tb;
1957    CPUClass *cc;
1958    uint32_t n;
1959
1960    tb = tcg_tb_lookup(retaddr);
1961    if (!tb) {
1962        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1963                  (void *)retaddr);
1964    }
1965    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
1966
1967    /*
1968     * Some guests must re-execute the branch when re-executing a delay
1969     * slot instruction.  When this is the case, adjust icount and N
1970     * to account for the re-execution of the branch.
1971     */
1972    n = 1;
1973    cc = CPU_GET_CLASS(cpu);
1974    if (cc->tcg_ops->io_recompile_replay_branch &&
1975        cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1976        cpu_neg(cpu)->icount_decr.u16.low++;
1977        n = 2;
1978    }
1979
1980    /*
1981     * Exit the loop and potentially generate a new TB executing the
1982     * just the I/O insns. We also limit instrumentation to memory
1983     * operations only (which execute after completion) so we don't
1984     * double instrument the instruction.
1985     */
1986    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1987
1988    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
1989                           "cpu_io_recompile: rewound execution of TB to "
1990                           TARGET_FMT_lx "\n", tb->pc);
1991
1992    cpu_loop_exit_noexc(cpu);
1993}
1994
1995static void print_qht_statistics(struct qht_stats hst, GString *buf)
1996{
1997    uint32_t hgram_opts;
1998    size_t hgram_bins;
1999    char *hgram;
2000
2001    if (!hst.head_buckets) {
2002        return;
2003    }
2004    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
2005                           "(%0.2f%% head buckets used)\n",
2006                           hst.used_head_buckets, hst.head_buckets,
2007                           (double)hst.used_head_buckets /
2008                           hst.head_buckets * 100);
2009
2010    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2011    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2012    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2013        hgram_opts |= QDIST_PR_NODECIMAL;
2014    }
2015    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2016    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
2017                           "Histogram: %s\n",
2018                           qdist_avg(&hst.occupancy) * 100, hgram);
2019    g_free(hgram);
2020
2021    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2022    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2023    if (hgram_bins > 10) {
2024        hgram_bins = 10;
2025    } else {
2026        hgram_bins = 0;
2027        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2028    }
2029    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2030    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
2031                           "Histogram: %s\n",
2032                           qdist_avg(&hst.chain), hgram);
2033    g_free(hgram);
2034}
2035
2036struct tb_tree_stats {
2037    size_t nb_tbs;
2038    size_t host_size;
2039    size_t target_size;
2040    size_t max_target_size;
2041    size_t direct_jmp_count;
2042    size_t direct_jmp2_count;
2043    size_t cross_page;
2044};
2045
2046static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2047{
2048    const TranslationBlock *tb = value;
2049    struct tb_tree_stats *tst = data;
2050
2051    tst->nb_tbs++;
2052    tst->host_size += tb->tc.size;
2053    tst->target_size += tb->size;
2054    if (tb->size > tst->max_target_size) {
2055        tst->max_target_size = tb->size;
2056    }
2057    if (tb->page_addr[1] != -1) {
2058        tst->cross_page++;
2059    }
2060    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2061        tst->direct_jmp_count++;
2062        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2063            tst->direct_jmp2_count++;
2064        }
2065    }
2066    return false;
2067}
2068
2069void dump_exec_info(GString *buf)
2070{
2071    struct tb_tree_stats tst = {};
2072    struct qht_stats hst;
2073    size_t nb_tbs, flush_full, flush_part, flush_elide;
2074
2075    tcg_tb_foreach(tb_tree_stats_iter, &tst);
2076    nb_tbs = tst.nb_tbs;
2077    /* XXX: avoid using doubles ? */
2078    g_string_append_printf(buf, "Translation buffer state:\n");
2079    /*
2080     * Report total code size including the padding and TB structs;
2081     * otherwise users might think "-accel tcg,tb-size" is not honoured.
2082     * For avg host size we use the precise numbers from tb_tree_stats though.
2083     */
2084    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
2085                           tcg_code_size(), tcg_code_capacity());
2086    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
2087    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
2088                           nb_tbs ? tst.target_size / nb_tbs : 0,
2089                           tst.max_target_size);
2090    g_string_append_printf(buf, "TB avg host size    %zu bytes "
2091                           "(expansion ratio: %0.1f)\n",
2092                           nb_tbs ? tst.host_size / nb_tbs : 0,
2093                           tst.target_size ?
2094                           (double)tst.host_size / tst.target_size : 0);
2095    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
2096                           tst.cross_page,
2097                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2098    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
2099                           "(2 jumps=%zu %zu%%)\n",
2100                           tst.direct_jmp_count,
2101                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2102                           tst.direct_jmp2_count,
2103                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2104
2105    qht_statistics_init(&tb_ctx.htable, &hst);
2106    print_qht_statistics(hst, buf);
2107    qht_statistics_destroy(&hst);
2108
2109    g_string_append_printf(buf, "\nStatistics:\n");
2110    g_string_append_printf(buf, "TB flush count      %u\n",
2111                           qatomic_read(&tb_ctx.tb_flush_count));
2112    g_string_append_printf(buf, "TB invalidate count %u\n",
2113                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
2114
2115    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2116    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
2117    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
2118    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
2119    tcg_dump_info(buf);
2120}
2121
2122void dump_opcount_info(GString *buf)
2123{
2124    tcg_dump_op_count(buf);
2125}
2126
2127#else /* CONFIG_USER_ONLY */
2128
2129void cpu_interrupt(CPUState *cpu, int mask)
2130{
2131    g_assert(qemu_mutex_iothread_locked());
2132    cpu->interrupt_request |= mask;
2133    qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2134}
2135
2136/*
2137 * Walks guest process memory "regions" one by one
2138 * and calls callback function 'fn' for each region.
2139 */
2140struct walk_memory_regions_data {
2141    walk_memory_regions_fn fn;
2142    void *priv;
2143    target_ulong start;
2144    int prot;
2145};
2146
2147static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2148                                   target_ulong end, int new_prot)
2149{
2150    if (data->start != -1u) {
2151        int rc = data->fn(data->priv, data->start, end, data->prot);
2152        if (rc != 0) {
2153            return rc;
2154        }
2155    }
2156
2157    data->start = (new_prot ? end : -1u);
2158    data->prot = new_prot;
2159
2160    return 0;
2161}
2162
2163static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2164                                 target_ulong base, int level, void **lp)
2165{
2166    target_ulong pa;
2167    int i, rc;
2168
2169    if (*lp == NULL) {
2170        return walk_memory_regions_end(data, base, 0);
2171    }
2172
2173    if (level == 0) {
2174        PageDesc *pd = *lp;
2175
2176        for (i = 0; i < V_L2_SIZE; ++i) {
2177            int prot = pd[i].flags;
2178
2179            pa = base | (i << TARGET_PAGE_BITS);
2180            if (prot != data->prot) {
2181                rc = walk_memory_regions_end(data, pa, prot);
2182                if (rc != 0) {
2183                    return rc;
2184                }
2185            }
2186        }
2187    } else {
2188        void **pp = *lp;
2189
2190        for (i = 0; i < V_L2_SIZE; ++i) {
2191            pa = base | ((target_ulong)i <<
2192                (TARGET_PAGE_BITS + V_L2_BITS * level));
2193            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2194            if (rc != 0) {
2195                return rc;
2196            }
2197        }
2198    }
2199
2200    return 0;
2201}
2202
2203int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2204{
2205    struct walk_memory_regions_data data;
2206    uintptr_t i, l1_sz = v_l1_size;
2207
2208    data.fn = fn;
2209    data.priv = priv;
2210    data.start = -1u;
2211    data.prot = 0;
2212
2213    for (i = 0; i < l1_sz; i++) {
2214        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2215        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2216        if (rc != 0) {
2217            return rc;
2218        }
2219    }
2220
2221    return walk_memory_regions_end(&data, 0, 0);
2222}
2223
2224static int dump_region(void *priv, target_ulong start,
2225    target_ulong end, unsigned long prot)
2226{
2227    FILE *f = (FILE *)priv;
2228
2229    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2230        " "TARGET_FMT_lx" %c%c%c\n",
2231        start, end, end - start,
2232        ((prot & PAGE_READ) ? 'r' : '-'),
2233        ((prot & PAGE_WRITE) ? 'w' : '-'),
2234        ((prot & PAGE_EXEC) ? 'x' : '-'));
2235
2236    return 0;
2237}
2238
2239/* dump memory mappings */
2240void page_dump(FILE *f)
2241{
2242    const int length = sizeof(target_ulong) * 2;
2243    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2244            length, "start", length, "end", length, "size", "prot");
2245    walk_memory_regions(f, dump_region);
2246}
2247
2248int page_get_flags(target_ulong address)
2249{
2250    PageDesc *p;
2251
2252    p = page_find(address >> TARGET_PAGE_BITS);
2253    if (!p) {
2254        return 0;
2255    }
2256    return p->flags;
2257}
2258
2259/* Modify the flags of a page and invalidate the code if necessary.
2260   The flag PAGE_WRITE_ORG is positioned automatically depending
2261   on PAGE_WRITE.  The mmap_lock should already be held.  */
2262void page_set_flags(target_ulong start, target_ulong end, int flags)
2263{
2264    target_ulong addr, len;
2265    bool reset_target_data;
2266
2267    /* This function should never be called with addresses outside the
2268       guest address space.  If this assert fires, it probably indicates
2269       a missing call to h2g_valid.  */
2270    assert(end - 1 <= GUEST_ADDR_MAX);
2271    assert(start < end);
2272    /* Only set PAGE_ANON with new mappings. */
2273    assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
2274    assert_memory_lock();
2275
2276    start = start & TARGET_PAGE_MASK;
2277    end = TARGET_PAGE_ALIGN(end);
2278
2279    if (flags & PAGE_WRITE) {
2280        flags |= PAGE_WRITE_ORG;
2281    }
2282    reset_target_data = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
2283    flags &= ~PAGE_RESET;
2284
2285    for (addr = start, len = end - start;
2286         len != 0;
2287         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2288        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2289
2290        /* If the write protection bit is set, then we invalidate
2291           the code inside.  */
2292        if (!(p->flags & PAGE_WRITE) &&
2293            (flags & PAGE_WRITE) &&
2294            p->first_tb) {
2295            tb_invalidate_phys_page(addr, 0);
2296        }
2297        if (reset_target_data) {
2298            g_free(p->target_data);
2299            p->target_data = NULL;
2300            p->flags = flags;
2301        } else {
2302            /* Using mprotect on a page does not change MAP_ANON. */
2303            p->flags = (p->flags & PAGE_ANON) | flags;
2304        }
2305    }
2306}
2307
2308void *page_get_target_data(target_ulong address)
2309{
2310    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2311    return p ? p->target_data : NULL;
2312}
2313
2314void *page_alloc_target_data(target_ulong address, size_t size)
2315{
2316    PageDesc *p = page_find(address >> TARGET_PAGE_BITS);
2317    void *ret = NULL;
2318
2319    if (p->flags & PAGE_VALID) {
2320        ret = p->target_data;
2321        if (!ret) {
2322            p->target_data = ret = g_malloc0(size);
2323        }
2324    }
2325    return ret;
2326}
2327
2328int page_check_range(target_ulong start, target_ulong len, int flags)
2329{
2330    PageDesc *p;
2331    target_ulong end;
2332    target_ulong addr;
2333
2334    /* This function should never be called with addresses outside the
2335       guest address space.  If this assert fires, it probably indicates
2336       a missing call to h2g_valid.  */
2337    if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2338        assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2339    }
2340
2341    if (len == 0) {
2342        return 0;
2343    }
2344    if (start + len - 1 < start) {
2345        /* We've wrapped around.  */
2346        return -1;
2347    }
2348
2349    /* must do before we loose bits in the next step */
2350    end = TARGET_PAGE_ALIGN(start + len);
2351    start = start & TARGET_PAGE_MASK;
2352
2353    for (addr = start, len = end - start;
2354         len != 0;
2355         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2356        p = page_find(addr >> TARGET_PAGE_BITS);
2357        if (!p) {
2358            return -1;
2359        }
2360        if (!(p->flags & PAGE_VALID)) {
2361            return -1;
2362        }
2363
2364        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2365            return -1;
2366        }
2367        if (flags & PAGE_WRITE) {
2368            if (!(p->flags & PAGE_WRITE_ORG)) {
2369                return -1;
2370            }
2371            /* unprotect the page if it was put read-only because it
2372               contains translated code */
2373            if (!(p->flags & PAGE_WRITE)) {
2374                if (!page_unprotect(addr, 0)) {
2375                    return -1;
2376                }
2377            }
2378        }
2379    }
2380    return 0;
2381}
2382
2383void page_protect(tb_page_addr_t page_addr)
2384{
2385    target_ulong addr;
2386    PageDesc *p;
2387    int prot;
2388
2389    p = page_find(page_addr >> TARGET_PAGE_BITS);
2390    if (p && (p->flags & PAGE_WRITE)) {
2391        /*
2392         * Force the host page as non writable (writes will have a page fault +
2393         * mprotect overhead).
2394         */
2395        page_addr &= qemu_host_page_mask;
2396        prot = 0;
2397        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
2398             addr += TARGET_PAGE_SIZE) {
2399
2400            p = page_find(addr >> TARGET_PAGE_BITS);
2401            if (!p) {
2402                continue;
2403            }
2404            prot |= p->flags;
2405            p->flags &= ~PAGE_WRITE;
2406        }
2407        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
2408                 (prot & PAGE_BITS) & ~PAGE_WRITE);
2409        if (DEBUG_TB_INVALIDATE_GATE) {
2410            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
2411        }
2412    }
2413}
2414
2415/* called from signal handler: invalidate the code and unprotect the
2416 * page. Return 0 if the fault was not handled, 1 if it was handled,
2417 * and 2 if it was handled but the caller must cause the TB to be
2418 * immediately exited. (We can only return 2 if the 'pc' argument is
2419 * non-zero.)
2420 */
2421int page_unprotect(target_ulong address, uintptr_t pc)
2422{
2423    unsigned int prot;
2424    bool current_tb_invalidated;
2425    PageDesc *p;
2426    target_ulong host_start, host_end, addr;
2427
2428    /* Technically this isn't safe inside a signal handler.  However we
2429       know this only ever happens in a synchronous SEGV handler, so in
2430       practice it seems to be ok.  */
2431    mmap_lock();
2432
2433    p = page_find(address >> TARGET_PAGE_BITS);
2434    if (!p) {
2435        mmap_unlock();
2436        return 0;
2437    }
2438
2439    /* if the page was really writable, then we change its
2440       protection back to writable */
2441    if (p->flags & PAGE_WRITE_ORG) {
2442        current_tb_invalidated = false;
2443        if (p->flags & PAGE_WRITE) {
2444            /* If the page is actually marked WRITE then assume this is because
2445             * this thread raced with another one which got here first and
2446             * set the page to PAGE_WRITE and did the TB invalidate for us.
2447             */
2448#ifdef TARGET_HAS_PRECISE_SMC
2449            TranslationBlock *current_tb = tcg_tb_lookup(pc);
2450            if (current_tb) {
2451                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2452            }
2453#endif
2454        } else {
2455            host_start = address & qemu_host_page_mask;
2456            host_end = host_start + qemu_host_page_size;
2457
2458            prot = 0;
2459            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2460                p = page_find(addr >> TARGET_PAGE_BITS);
2461                p->flags |= PAGE_WRITE;
2462                prot |= p->flags;
2463
2464                /* and since the content will be modified, we must invalidate
2465                   the corresponding translated code. */
2466                current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2467#ifdef CONFIG_USER_ONLY
2468                if (DEBUG_TB_CHECK_GATE) {
2469                    tb_invalidate_check(addr);
2470                }
2471#endif
2472            }
2473            mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
2474                     prot & PAGE_BITS);
2475        }
2476        mmap_unlock();
2477        /* If current TB was invalidated return to main loop */
2478        return current_tb_invalidated ? 2 : 1;
2479    }
2480    mmap_unlock();
2481    return 0;
2482}
2483#endif /* CONFIG_USER_ONLY */
2484
2485/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2486void tcg_flush_softmmu_tlb(CPUState *cs)
2487{
2488#ifdef CONFIG_SOFTMMU
2489    tlb_flush(cs);
2490#endif
2491}
2492