qemu/accel/tcg/translate-all.c
<<
>>
Prefs
   1/*
   2 *  Host code generation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22
  23#define NO_CPU_IO_DEFS
  24#include "cpu.h"
  25#include "trace.h"
  26#include "disas/disas.h"
  27#include "exec/exec-all.h"
  28#include "tcg.h"
  29#if defined(CONFIG_USER_ONLY)
  30#include "qemu.h"
  31#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  32#include <sys/param.h>
  33#if __FreeBSD_version >= 700104
  34#define HAVE_KINFO_GETVMMAP
  35#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  36#include <sys/proc.h>
  37#include <machine/profile.h>
  38#define _KERNEL
  39#include <sys/user.h>
  40#undef _KERNEL
  41#undef sigqueue
  42#include <libutil.h>
  43#endif
  44#endif
  45#else
  46#include "exec/ram_addr.h"
  47#endif
  48
  49#include "exec/cputlb.h"
  50#include "exec/tb-hash.h"
  51#include "translate-all.h"
  52#include "qemu/bitmap.h"
  53#include "qemu/error-report.h"
  54#include "qemu/qemu-print.h"
  55#include "qemu/timer.h"
  56#include "qemu/main-loop.h"
  57#include "exec/log.h"
  58#include "sysemu/cpus.h"
  59#include "sysemu/tcg.h"
  60
  61/* #define DEBUG_TB_INVALIDATE */
  62/* #define DEBUG_TB_FLUSH */
  63/* make various TB consistency checks */
  64/* #define DEBUG_TB_CHECK */
  65
  66#ifdef DEBUG_TB_INVALIDATE
  67#define DEBUG_TB_INVALIDATE_GATE 1
  68#else
  69#define DEBUG_TB_INVALIDATE_GATE 0
  70#endif
  71
  72#ifdef DEBUG_TB_FLUSH
  73#define DEBUG_TB_FLUSH_GATE 1
  74#else
  75#define DEBUG_TB_FLUSH_GATE 0
  76#endif
  77
  78#if !defined(CONFIG_USER_ONLY)
  79/* TB consistency checks only implemented for usermode emulation.  */
  80#undef DEBUG_TB_CHECK
  81#endif
  82
  83#ifdef DEBUG_TB_CHECK
  84#define DEBUG_TB_CHECK_GATE 1
  85#else
  86#define DEBUG_TB_CHECK_GATE 0
  87#endif
  88
  89/* Access to the various translations structures need to be serialised via locks
  90 * for consistency.
  91 * In user-mode emulation access to the memory related structures are protected
  92 * with mmap_lock.
  93 * In !user-mode we use per-page locks.
  94 */
  95#ifdef CONFIG_SOFTMMU
  96#define assert_memory_lock()
  97#else
  98#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
  99#endif
 100
 101#define SMC_BITMAP_USE_THRESHOLD 10
 102
 103typedef struct PageDesc {
 104    /* list of TBs intersecting this ram page */
 105    uintptr_t first_tb;
 106#ifdef CONFIG_SOFTMMU
 107    /* in order to optimize self modifying code, we count the number
 108       of lookups we do to a given page to use a bitmap */
 109    unsigned long *code_bitmap;
 110    unsigned int code_write_count;
 111#else
 112    unsigned long flags;
 113#endif
 114#ifndef CONFIG_USER_ONLY
 115    QemuSpin lock;
 116#endif
 117} PageDesc;
 118
 119/**
 120 * struct page_entry - page descriptor entry
 121 * @pd:     pointer to the &struct PageDesc of the page this entry represents
 122 * @index:  page index of the page
 123 * @locked: whether the page is locked
 124 *
 125 * This struct helps us keep track of the locked state of a page, without
 126 * bloating &struct PageDesc.
 127 *
 128 * A page lock protects accesses to all fields of &struct PageDesc.
 129 *
 130 * See also: &struct page_collection.
 131 */
 132struct page_entry {
 133    PageDesc *pd;
 134    tb_page_addr_t index;
 135    bool locked;
 136};
 137
 138/**
 139 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
 140 * @tree:   Binary search tree (BST) of the pages, with key == page index
 141 * @max:    Pointer to the page in @tree with the highest page index
 142 *
 143 * To avoid deadlock we lock pages in ascending order of page index.
 144 * When operating on a set of pages, we need to keep track of them so that
 145 * we can lock them in order and also unlock them later. For this we collect
 146 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
 147 * @tree implementation we use does not provide an O(1) operation to obtain the
 148 * highest-ranked element, we use @max to keep track of the inserted page
 149 * with the highest index. This is valuable because if a page is not in
 150 * the tree and its index is higher than @max's, then we can lock it
 151 * without breaking the locking order rule.
 152 *
 153 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 154 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 155 *
 156 * See also: page_collection_lock().
 157 */
 158struct page_collection {
 159    GTree *tree;
 160    struct page_entry *max;
 161};
 162
 163/* list iterators for lists of tagged pointers in TranslationBlock */
 164#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
 165    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
 166         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
 167             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
 168
 169#define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
 170    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
 171
 172#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
 173    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 174
 175/* In system mode we want L1_MAP to be based on ram offsets,
 176   while in user mode we want it to be based on virtual addresses.  */
 177#if !defined(CONFIG_USER_ONLY)
 178#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 179# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 180#else
 181# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 182#endif
 183#else
 184# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
 185#endif
 186
 187/* Size of the L2 (and L3, etc) page tables.  */
 188#define V_L2_BITS 10
 189#define V_L2_SIZE (1 << V_L2_BITS)
 190
 191/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
 192QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
 193                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
 194                  * BITS_PER_BYTE);
 195
 196/*
 197 * L1 Mapping properties
 198 */
 199static int v_l1_size;
 200static int v_l1_shift;
 201static int v_l2_levels;
 202
 203/* The bottom level has pointers to PageDesc, and is indexed by
 204 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
 205 */
 206#define V_L1_MIN_BITS 4
 207#define V_L1_MAX_BITS (V_L2_BITS + 3)
 208#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
 209
 210static void *l1_map[V_L1_MAX_SIZE];
 211
 212/* code generation context */
 213TCGContext tcg_init_ctx;
 214__thread TCGContext *tcg_ctx;
 215TBContext tb_ctx;
 216bool parallel_cpus;
 217
 218static void page_table_config_init(void)
 219{
 220    uint32_t v_l1_bits;
 221
 222    assert(TARGET_PAGE_BITS);
 223    /* The bits remaining after N lower levels of page tables.  */
 224    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 225    if (v_l1_bits < V_L1_MIN_BITS) {
 226        v_l1_bits += V_L2_BITS;
 227    }
 228
 229    v_l1_size = 1 << v_l1_bits;
 230    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 231    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 232
 233    assert(v_l1_bits <= V_L1_MAX_BITS);
 234    assert(v_l1_shift % V_L2_BITS == 0);
 235    assert(v_l2_levels >= 0);
 236}
 237
 238void cpu_gen_init(void)
 239{
 240    tcg_context_init(&tcg_init_ctx);
 241}
 242
 243/* Encode VAL as a signed leb128 sequence at P.
 244   Return P incremented past the encoded value.  */
 245static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 246{
 247    int more, byte;
 248
 249    do {
 250        byte = val & 0x7f;
 251        val >>= 7;
 252        more = !((val == 0 && (byte & 0x40) == 0)
 253                 || (val == -1 && (byte & 0x40) != 0));
 254        if (more) {
 255            byte |= 0x80;
 256        }
 257        *p++ = byte;
 258    } while (more);
 259
 260    return p;
 261}
 262
 263/* Decode a signed leb128 sequence at *PP; increment *PP past the
 264   decoded value.  Return the decoded value.  */
 265static target_long decode_sleb128(uint8_t **pp)
 266{
 267    uint8_t *p = *pp;
 268    target_long val = 0;
 269    int byte, shift = 0;
 270
 271    do {
 272        byte = *p++;
 273        val |= (target_ulong)(byte & 0x7f) << shift;
 274        shift += 7;
 275    } while (byte & 0x80);
 276    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 277        val |= -(target_ulong)1 << shift;
 278    }
 279
 280    *pp = p;
 281    return val;
 282}
 283
 284/* Encode the data collected about the instructions while compiling TB.
 285   Place the data at BLOCK, and return the number of bytes consumed.
 286
 287   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 288   which come from the target's insn_start data, followed by a uintptr_t
 289   which comes from the host pc of the end of the code implementing the insn.
 290
 291   Each line of the table is encoded as sleb128 deltas from the previous
 292   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 293   That is, the first column is seeded with the guest pc, the last column
 294   with the host pc, and the middle columns with zeros.  */
 295
 296static int encode_search(TranslationBlock *tb, uint8_t *block)
 297{
 298    uint8_t *highwater = tcg_ctx->code_gen_highwater;
 299    uint8_t *p = block;
 300    int i, j, n;
 301
 302    for (i = 0, n = tb->icount; i < n; ++i) {
 303        target_ulong prev;
 304
 305        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 306            if (i == 0) {
 307                prev = (j == 0 ? tb->pc : 0);
 308            } else {
 309                prev = tcg_ctx->gen_insn_data[i - 1][j];
 310            }
 311            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
 312        }
 313        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
 314        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
 315
 316        /* Test for (pending) buffer overflow.  The assumption is that any
 317           one row beginning below the high water mark cannot overrun
 318           the buffer completely.  Thus we can test for overflow after
 319           encoding a row without having to check during encoding.  */
 320        if (unlikely(p > highwater)) {
 321            return -1;
 322        }
 323    }
 324
 325    return p - block;
 326}
 327
 328/* The cpu state corresponding to 'searched_pc' is restored.
 329 * When reset_icount is true, current TB will be interrupted and
 330 * icount should be recalculated.
 331 */
 332static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 333                                     uintptr_t searched_pc, bool reset_icount)
 334{
 335    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
 336    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
 337    CPUArchState *env = cpu->env_ptr;
 338    uint8_t *p = tb->tc.ptr + tb->tc.size;
 339    int i, j, num_insns = tb->icount;
 340#ifdef CONFIG_PROFILER
 341    TCGProfile *prof = &tcg_ctx->prof;
 342    int64_t ti = profile_getclock();
 343#endif
 344
 345    searched_pc -= GETPC_ADJ;
 346
 347    if (searched_pc < host_pc) {
 348        return -1;
 349    }
 350
 351    /* Reconstruct the stored insn data while looking for the point at
 352       which the end of the insn exceeds the searched_pc.  */
 353    for (i = 0; i < num_insns; ++i) {
 354        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 355            data[j] += decode_sleb128(&p);
 356        }
 357        host_pc += decode_sleb128(&p);
 358        if (host_pc > searched_pc) {
 359            goto found;
 360        }
 361    }
 362    return -1;
 363
 364 found:
 365    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
 366        assert(use_icount);
 367        /* Reset the cycle counter to the start of the block
 368           and shift if to the number of actually executed instructions */
 369        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
 370    }
 371    restore_state_to_opc(env, tb, data);
 372
 373#ifdef CONFIG_PROFILER
 374    atomic_set(&prof->restore_time,
 375                prof->restore_time + profile_getclock() - ti);
 376    atomic_set(&prof->restore_count, prof->restore_count + 1);
 377#endif
 378    return 0;
 379}
 380
 381bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
 382{
 383    TranslationBlock *tb;
 384    bool r = false;
 385    uintptr_t check_offset;
 386
 387    /* The host_pc has to be in the region of current code buffer. If
 388     * it is not we will not be able to resolve it here. The two cases
 389     * where host_pc will not be correct are:
 390     *
 391     *  - fault during translation (instruction fetch)
 392     *  - fault from helper (not using GETPC() macro)
 393     *
 394     * Either way we need return early as we can't resolve it here.
 395     *
 396     * We are using unsigned arithmetic so if host_pc <
 397     * tcg_init_ctx.code_gen_buffer check_offset will wrap to way
 398     * above the code_gen_buffer_size
 399     */
 400    check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer;
 401
 402    if (check_offset < tcg_init_ctx.code_gen_buffer_size) {
 403        tb = tcg_tb_lookup(host_pc);
 404        if (tb) {
 405            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
 406            if (tb_cflags(tb) & CF_NOCACHE) {
 407                /* one-shot translation, invalidate it immediately */
 408                tb_phys_invalidate(tb, -1);
 409                tcg_tb_remove(tb);
 410            }
 411            r = true;
 412        }
 413    }
 414
 415    return r;
 416}
 417
 418static void page_init(void)
 419{
 420    page_size_init();
 421    page_table_config_init();
 422
 423#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 424    {
 425#ifdef HAVE_KINFO_GETVMMAP
 426        struct kinfo_vmentry *freep;
 427        int i, cnt;
 428
 429        freep = kinfo_getvmmap(getpid(), &cnt);
 430        if (freep) {
 431            mmap_lock();
 432            for (i = 0; i < cnt; i++) {
 433                unsigned long startaddr, endaddr;
 434
 435                startaddr = freep[i].kve_start;
 436                endaddr = freep[i].kve_end;
 437                if (h2g_valid(startaddr)) {
 438                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 439
 440                    if (h2g_valid(endaddr)) {
 441                        endaddr = h2g(endaddr);
 442                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 443                    } else {
 444#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 445                        endaddr = ~0ul;
 446                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 447#endif
 448                    }
 449                }
 450            }
 451            free(freep);
 452            mmap_unlock();
 453        }
 454#else
 455        FILE *f;
 456
 457        last_brk = (unsigned long)sbrk(0);
 458
 459        f = fopen("/compat/linux/proc/self/maps", "r");
 460        if (f) {
 461            mmap_lock();
 462
 463            do {
 464                unsigned long startaddr, endaddr;
 465                int n;
 466
 467                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 468
 469                if (n == 2 && h2g_valid(startaddr)) {
 470                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 471
 472                    if (h2g_valid(endaddr)) {
 473                        endaddr = h2g(endaddr);
 474                    } else {
 475                        endaddr = ~0ul;
 476                    }
 477                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 478                }
 479            } while (!feof(f));
 480
 481            fclose(f);
 482            mmap_unlock();
 483        }
 484#endif
 485    }
 486#endif
 487}
 488
 489static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 490{
 491    PageDesc *pd;
 492    void **lp;
 493    int i;
 494
 495    /* Level 1.  Always allocated.  */
 496    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 497
 498    /* Level 2..N-1.  */
 499    for (i = v_l2_levels; i > 0; i--) {
 500        void **p = atomic_rcu_read(lp);
 501
 502        if (p == NULL) {
 503            void *existing;
 504
 505            if (!alloc) {
 506                return NULL;
 507            }
 508            p = g_new0(void *, V_L2_SIZE);
 509            existing = atomic_cmpxchg(lp, NULL, p);
 510            if (unlikely(existing)) {
 511                g_free(p);
 512                p = existing;
 513            }
 514        }
 515
 516        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 517    }
 518
 519    pd = atomic_rcu_read(lp);
 520    if (pd == NULL) {
 521        void *existing;
 522
 523        if (!alloc) {
 524            return NULL;
 525        }
 526        pd = g_new0(PageDesc, V_L2_SIZE);
 527#ifndef CONFIG_USER_ONLY
 528        {
 529            int i;
 530
 531            for (i = 0; i < V_L2_SIZE; i++) {
 532                qemu_spin_init(&pd[i].lock);
 533            }
 534        }
 535#endif
 536        existing = atomic_cmpxchg(lp, NULL, pd);
 537        if (unlikely(existing)) {
 538            g_free(pd);
 539            pd = existing;
 540        }
 541    }
 542
 543    return pd + (index & (V_L2_SIZE - 1));
 544}
 545
 546static inline PageDesc *page_find(tb_page_addr_t index)
 547{
 548    return page_find_alloc(index, 0);
 549}
 550
 551static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 552                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
 553
 554/* In user-mode page locks aren't used; mmap_lock is enough */
 555#ifdef CONFIG_USER_ONLY
 556
 557#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
 558
 559static inline void page_lock(PageDesc *pd)
 560{ }
 561
 562static inline void page_unlock(PageDesc *pd)
 563{ }
 564
 565static inline void page_lock_tb(const TranslationBlock *tb)
 566{ }
 567
 568static inline void page_unlock_tb(const TranslationBlock *tb)
 569{ }
 570
 571struct page_collection *
 572page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 573{
 574    return NULL;
 575}
 576
 577void page_collection_unlock(struct page_collection *set)
 578{ }
 579#else /* !CONFIG_USER_ONLY */
 580
 581#ifdef CONFIG_DEBUG_TCG
 582
 583static __thread GHashTable *ht_pages_locked_debug;
 584
 585static void ht_pages_locked_debug_init(void)
 586{
 587    if (ht_pages_locked_debug) {
 588        return;
 589    }
 590    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 591}
 592
 593static bool page_is_locked(const PageDesc *pd)
 594{
 595    PageDesc *found;
 596
 597    ht_pages_locked_debug_init();
 598    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 599    return !!found;
 600}
 601
 602static void page_lock__debug(PageDesc *pd)
 603{
 604    ht_pages_locked_debug_init();
 605    g_assert(!page_is_locked(pd));
 606    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 607}
 608
 609static void page_unlock__debug(const PageDesc *pd)
 610{
 611    bool removed;
 612
 613    ht_pages_locked_debug_init();
 614    g_assert(page_is_locked(pd));
 615    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 616    g_assert(removed);
 617}
 618
 619static void
 620do_assert_page_locked(const PageDesc *pd, const char *file, int line)
 621{
 622    if (unlikely(!page_is_locked(pd))) {
 623        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 624                     pd, file, line);
 625        abort();
 626    }
 627}
 628
 629#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
 630
 631void assert_no_pages_locked(void)
 632{
 633    ht_pages_locked_debug_init();
 634    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 635}
 636
 637#else /* !CONFIG_DEBUG_TCG */
 638
 639#define assert_page_locked(pd)
 640
 641static inline void page_lock__debug(const PageDesc *pd)
 642{
 643}
 644
 645static inline void page_unlock__debug(const PageDesc *pd)
 646{
 647}
 648
 649#endif /* CONFIG_DEBUG_TCG */
 650
 651static inline void page_lock(PageDesc *pd)
 652{
 653    page_lock__debug(pd);
 654    qemu_spin_lock(&pd->lock);
 655}
 656
 657static inline void page_unlock(PageDesc *pd)
 658{
 659    qemu_spin_unlock(&pd->lock);
 660    page_unlock__debug(pd);
 661}
 662
 663/* lock the page(s) of a TB in the correct acquisition order */
 664static inline void page_lock_tb(const TranslationBlock *tb)
 665{
 666    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
 667}
 668
 669static inline void page_unlock_tb(const TranslationBlock *tb)
 670{
 671    PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 672
 673    page_unlock(p1);
 674    if (unlikely(tb->page_addr[1] != -1)) {
 675        PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 676
 677        if (p2 != p1) {
 678            page_unlock(p2);
 679        }
 680    }
 681}
 682
 683static inline struct page_entry *
 684page_entry_new(PageDesc *pd, tb_page_addr_t index)
 685{
 686    struct page_entry *pe = g_malloc(sizeof(*pe));
 687
 688    pe->index = index;
 689    pe->pd = pd;
 690    pe->locked = false;
 691    return pe;
 692}
 693
 694static void page_entry_destroy(gpointer p)
 695{
 696    struct page_entry *pe = p;
 697
 698    g_assert(pe->locked);
 699    page_unlock(pe->pd);
 700    g_free(pe);
 701}
 702
 703/* returns false on success */
 704static bool page_entry_trylock(struct page_entry *pe)
 705{
 706    bool busy;
 707
 708    busy = qemu_spin_trylock(&pe->pd->lock);
 709    if (!busy) {
 710        g_assert(!pe->locked);
 711        pe->locked = true;
 712        page_lock__debug(pe->pd);
 713    }
 714    return busy;
 715}
 716
 717static void do_page_entry_lock(struct page_entry *pe)
 718{
 719    page_lock(pe->pd);
 720    g_assert(!pe->locked);
 721    pe->locked = true;
 722}
 723
 724static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 725{
 726    struct page_entry *pe = value;
 727
 728    do_page_entry_lock(pe);
 729    return FALSE;
 730}
 731
 732static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 733{
 734    struct page_entry *pe = value;
 735
 736    if (pe->locked) {
 737        pe->locked = false;
 738        page_unlock(pe->pd);
 739    }
 740    return FALSE;
 741}
 742
 743/*
 744 * Trylock a page, and if successful, add the page to a collection.
 745 * Returns true ("busy") if the page could not be locked; false otherwise.
 746 */
 747static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 748{
 749    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 750    struct page_entry *pe;
 751    PageDesc *pd;
 752
 753    pe = g_tree_lookup(set->tree, &index);
 754    if (pe) {
 755        return false;
 756    }
 757
 758    pd = page_find(index);
 759    if (pd == NULL) {
 760        return false;
 761    }
 762
 763    pe = page_entry_new(pd, index);
 764    g_tree_insert(set->tree, &pe->index, pe);
 765
 766    /*
 767     * If this is either (1) the first insertion or (2) a page whose index
 768     * is higher than any other so far, just lock the page and move on.
 769     */
 770    if (set->max == NULL || pe->index > set->max->index) {
 771        set->max = pe;
 772        do_page_entry_lock(pe);
 773        return false;
 774    }
 775    /*
 776     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 777     * locks in order.
 778     */
 779    return page_entry_trylock(pe);
 780}
 781
 782static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 783{
 784    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 785    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 786
 787    if (a == b) {
 788        return 0;
 789    } else if (a < b) {
 790        return -1;
 791    }
 792    return 1;
 793}
 794
 795/*
 796 * Lock a range of pages ([@start,@end[) as well as the pages of all
 797 * intersecting TBs.
 798 * Locking order: acquire locks in ascending order of page index.
 799 */
 800struct page_collection *
 801page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 802{
 803    struct page_collection *set = g_malloc(sizeof(*set));
 804    tb_page_addr_t index;
 805    PageDesc *pd;
 806
 807    start >>= TARGET_PAGE_BITS;
 808    end   >>= TARGET_PAGE_BITS;
 809    g_assert(start <= end);
 810
 811    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 812                                page_entry_destroy);
 813    set->max = NULL;
 814    assert_no_pages_locked();
 815
 816 retry:
 817    g_tree_foreach(set->tree, page_entry_lock, NULL);
 818
 819    for (index = start; index <= end; index++) {
 820        TranslationBlock *tb;
 821        int n;
 822
 823        pd = page_find(index);
 824        if (pd == NULL) {
 825            continue;
 826        }
 827        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 828            g_tree_foreach(set->tree, page_entry_unlock, NULL);
 829            goto retry;
 830        }
 831        assert_page_locked(pd);
 832        PAGE_FOR_EACH_TB(pd, tb, n) {
 833            if (page_trylock_add(set, tb->page_addr[0]) ||
 834                (tb->page_addr[1] != -1 &&
 835                 page_trylock_add(set, tb->page_addr[1]))) {
 836                /* drop all locks, and reacquire in order */
 837                g_tree_foreach(set->tree, page_entry_unlock, NULL);
 838                goto retry;
 839            }
 840        }
 841    }
 842    return set;
 843}
 844
 845void page_collection_unlock(struct page_collection *set)
 846{
 847    /* entries are unlocked and freed via page_entry_destroy */
 848    g_tree_destroy(set->tree);
 849    g_free(set);
 850}
 851
 852#endif /* !CONFIG_USER_ONLY */
 853
 854static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 855                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
 856{
 857    PageDesc *p1, *p2;
 858    tb_page_addr_t page1;
 859    tb_page_addr_t page2;
 860
 861    assert_memory_lock();
 862    g_assert(phys1 != -1);
 863
 864    page1 = phys1 >> TARGET_PAGE_BITS;
 865    page2 = phys2 >> TARGET_PAGE_BITS;
 866
 867    p1 = page_find_alloc(page1, alloc);
 868    if (ret_p1) {
 869        *ret_p1 = p1;
 870    }
 871    if (likely(phys2 == -1)) {
 872        page_lock(p1);
 873        return;
 874    } else if (page1 == page2) {
 875        page_lock(p1);
 876        if (ret_p2) {
 877            *ret_p2 = p1;
 878        }
 879        return;
 880    }
 881    p2 = page_find_alloc(page2, alloc);
 882    if (ret_p2) {
 883        *ret_p2 = p2;
 884    }
 885    if (page1 < page2) {
 886        page_lock(p1);
 887        page_lock(p2);
 888    } else {
 889        page_lock(p2);
 890        page_lock(p1);
 891    }
 892}
 893
 894#if defined(CONFIG_USER_ONLY)
 895/* Currently it is not recommended to allocate big chunks of data in
 896   user mode. It will change when a dedicated libc will be used.  */
 897/* ??? 64-bit hosts ought to have no problem mmaping data outside the
 898   region in which the guest needs to run.  Revisit this.  */
 899#define USE_STATIC_CODE_GEN_BUFFER
 900#endif
 901
 902/* Minimum size of the code gen buffer.  This number is randomly chosen,
 903   but not so small that we can't have a fair number of TB's live.  */
 904#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
 905
 906/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
 907   indicated, this is constrained by the range of direct branches on the
 908   host cpu, as used by the TCG implementation of goto_tb.  */
 909#if defined(__x86_64__)
 910# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 911#elif defined(__sparc__)
 912# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 913#elif defined(__powerpc64__)
 914# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 915#elif defined(__powerpc__)
 916# define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 917#elif defined(__aarch64__)
 918# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 919#elif defined(__s390x__)
 920  /* We have a +- 4GB range on the branches; leave some slop.  */
 921# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
 922#elif defined(__mips__)
 923  /* We have a 256MB branch region, but leave room to make sure the
 924     main executable is also within that region.  */
 925# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
 926#else
 927# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 928#endif
 929
 930#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
 931
 932#define DEFAULT_CODE_GEN_BUFFER_SIZE \
 933  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 934   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 935
 936static inline size_t size_code_gen_buffer(size_t tb_size)
 937{
 938    /* Size the buffer.  */
 939    if (tb_size == 0) {
 940#ifdef USE_STATIC_CODE_GEN_BUFFER
 941        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 942#else
 943        /* ??? Needs adjustments.  */
 944        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
 945           static buffer, we could size this on RESERVED_VA, on the text
 946           segment size of the executable, or continue to use the default.  */
 947        tb_size = (unsigned long)(ram_size / 4);
 948#endif
 949    }
 950    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 951        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 952    }
 953    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 954        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 955    }
 956    return tb_size;
 957}
 958
 959#ifdef __mips__
 960/* In order to use J and JAL within the code_gen_buffer, we require
 961   that the buffer not cross a 256MB boundary.  */
 962static inline bool cross_256mb(void *addr, size_t size)
 963{
 964    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
 965}
 966
 967/* We weren't able to allocate a buffer without crossing that boundary,
 968   so make do with the larger portion of the buffer that doesn't cross.
 969   Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
 970static inline void *split_cross_256mb(void *buf1, size_t size1)
 971{
 972    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
 973    size_t size2 = buf1 + size1 - buf2;
 974
 975    size1 = buf2 - buf1;
 976    if (size1 < size2) {
 977        size1 = size2;
 978        buf1 = buf2;
 979    }
 980
 981    tcg_ctx->code_gen_buffer_size = size1;
 982    return buf1;
 983}
 984#endif
 985
 986#ifdef USE_STATIC_CODE_GEN_BUFFER
 987static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 988    __attribute__((aligned(CODE_GEN_ALIGN)));
 989
 990static inline void *alloc_code_gen_buffer(void)
 991{
 992    void *buf = static_code_gen_buffer;
 993    void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 994    size_t size;
 995
 996    /* page-align the beginning and end of the buffer */
 997    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
 998    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
 999
1000    size = end - buf;
1001
1002    /* Honor a command-line option limiting the size of the buffer.  */
1003    if (size > tcg_ctx->code_gen_buffer_size) {
1004        size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size,
1005                               qemu_real_host_page_size);
1006    }
1007    tcg_ctx->code_gen_buffer_size = size;
1008
1009#ifdef __mips__
1010    if (cross_256mb(buf, size)) {
1011        buf = split_cross_256mb(buf, size);
1012        size = tcg_ctx->code_gen_buffer_size;
1013    }
1014#endif
1015
1016    if (qemu_mprotect_rwx(buf, size)) {
1017        abort();
1018    }
1019    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1020
1021    return buf;
1022}
1023#elif defined(_WIN32)
1024static inline void *alloc_code_gen_buffer(void)
1025{
1026    size_t size = tcg_ctx->code_gen_buffer_size;
1027    return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
1028                        PAGE_EXECUTE_READWRITE);
1029}
1030#else
1031static inline void *alloc_code_gen_buffer(void)
1032{
1033    int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
1034    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
1035    uintptr_t start = 0;
1036    size_t size = tcg_ctx->code_gen_buffer_size;
1037    void *buf;
1038
1039    /* Constrain the position of the buffer based on the host cpu.
1040       Note that these addresses are chosen in concert with the
1041       addresses assigned in the relevant linker script file.  */
1042# if defined(__PIE__) || defined(__PIC__)
1043    /* Don't bother setting a preferred location if we're building
1044       a position-independent executable.  We're more likely to get
1045       an address near the main executable if we let the kernel
1046       choose the address.  */
1047# elif defined(__x86_64__) && defined(MAP_32BIT)
1048    /* Force the memory down into low memory with the executable.
1049       Leave the choice of exact location with the kernel.  */
1050    flags |= MAP_32BIT;
1051    /* Cannot expect to map more than 800MB in low memory.  */
1052    if (size > 800u * 1024 * 1024) {
1053        tcg_ctx->code_gen_buffer_size = size = 800u * 1024 * 1024;
1054    }
1055# elif defined(__sparc__)
1056    start = 0x40000000ul;
1057# elif defined(__s390x__)
1058    start = 0x90000000ul;
1059# elif defined(__mips__)
1060#  if _MIPS_SIM == _ABI64
1061    start = 0x128000000ul;
1062#  else
1063    start = 0x08000000ul;
1064#  endif
1065# endif
1066
1067    buf = mmap((void *)start, size, prot, flags, -1, 0);
1068    if (buf == MAP_FAILED) {
1069        return NULL;
1070    }
1071
1072#ifdef __mips__
1073    if (cross_256mb(buf, size)) {
1074        /* Try again, with the original still mapped, to avoid re-acquiring
1075           that 256mb crossing.  This time don't specify an address.  */
1076        size_t size2;
1077        void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1078        switch ((int)(buf2 != MAP_FAILED)) {
1079        case 1:
1080            if (!cross_256mb(buf2, size)) {
1081                /* Success!  Use the new buffer.  */
1082                munmap(buf, size);
1083                break;
1084            }
1085            /* Failure.  Work with what we had.  */
1086            munmap(buf2, size);
1087            /* fallthru */
1088        default:
1089            /* Split the original buffer.  Free the smaller half.  */
1090            buf2 = split_cross_256mb(buf, size);
1091            size2 = tcg_ctx->code_gen_buffer_size;
1092            if (buf == buf2) {
1093                munmap(buf + size2, size - size2);
1094            } else {
1095                munmap(buf, size - size2);
1096            }
1097            size = size2;
1098            break;
1099        }
1100        buf = buf2;
1101    }
1102#endif
1103
1104    /* Request large pages for the buffer.  */
1105    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1106
1107    return buf;
1108}
1109#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1110
1111static inline void code_gen_alloc(size_t tb_size)
1112{
1113    tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size);
1114    tcg_ctx->code_gen_buffer = alloc_code_gen_buffer();
1115    if (tcg_ctx->code_gen_buffer == NULL) {
1116        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
1117        exit(1);
1118    }
1119}
1120
1121static bool tb_cmp(const void *ap, const void *bp)
1122{
1123    const TranslationBlock *a = ap;
1124    const TranslationBlock *b = bp;
1125
1126    return a->pc == b->pc &&
1127        a->cs_base == b->cs_base &&
1128        a->flags == b->flags &&
1129        (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
1130        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1131        a->page_addr[0] == b->page_addr[0] &&
1132        a->page_addr[1] == b->page_addr[1];
1133}
1134
1135static void tb_htable_init(void)
1136{
1137    unsigned int mode = QHT_MODE_AUTO_RESIZE;
1138
1139    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1140}
1141
1142/* Must be called before using the QEMU cpus. 'tb_size' is the size
1143   (in bytes) allocated to the translation buffer. Zero means default
1144   size. */
1145void tcg_exec_init(unsigned long tb_size)
1146{
1147    tcg_allowed = true;
1148    cpu_gen_init();
1149    page_init();
1150    tb_htable_init();
1151    code_gen_alloc(tb_size);
1152#if defined(CONFIG_SOFTMMU)
1153    /* There's no guest base to take into account, so go ahead and
1154       initialize the prologue now.  */
1155    tcg_prologue_init(tcg_ctx);
1156#endif
1157}
1158
1159/* call with @p->lock held */
1160static inline void invalidate_page_bitmap(PageDesc *p)
1161{
1162    assert_page_locked(p);
1163#ifdef CONFIG_SOFTMMU
1164    g_free(p->code_bitmap);
1165    p->code_bitmap = NULL;
1166    p->code_write_count = 0;
1167#endif
1168}
1169
1170/* Set to NULL all the 'first_tb' fields in all PageDescs. */
1171static void page_flush_tb_1(int level, void **lp)
1172{
1173    int i;
1174
1175    if (*lp == NULL) {
1176        return;
1177    }
1178    if (level == 0) {
1179        PageDesc *pd = *lp;
1180
1181        for (i = 0; i < V_L2_SIZE; ++i) {
1182            page_lock(&pd[i]);
1183            pd[i].first_tb = (uintptr_t)NULL;
1184            invalidate_page_bitmap(pd + i);
1185            page_unlock(&pd[i]);
1186        }
1187    } else {
1188        void **pp = *lp;
1189
1190        for (i = 0; i < V_L2_SIZE; ++i) {
1191            page_flush_tb_1(level - 1, pp + i);
1192        }
1193    }
1194}
1195
1196static void page_flush_tb(void)
1197{
1198    int i, l1_sz = v_l1_size;
1199
1200    for (i = 0; i < l1_sz; i++) {
1201        page_flush_tb_1(v_l2_levels, l1_map + i);
1202    }
1203}
1204
1205static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1206{
1207    const TranslationBlock *tb = value;
1208    size_t *size = data;
1209
1210    *size += tb->tc.size;
1211    return false;
1212}
1213
1214/* flush all the translation blocks */
1215static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1216{
1217    bool did_flush = false;
1218
1219    mmap_lock();
1220    /* If it is already been done on request of another CPU,
1221     * just retry.
1222     */
1223    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1224        goto done;
1225    }
1226    did_flush = true;
1227
1228    if (DEBUG_TB_FLUSH_GATE) {
1229        size_t nb_tbs = tcg_nb_tbs();
1230        size_t host_size = 0;
1231
1232        tcg_tb_foreach(tb_host_size_iter, &host_size);
1233        printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1234               tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1235    }
1236
1237    CPU_FOREACH(cpu) {
1238        cpu_tb_jmp_cache_clear(cpu);
1239    }
1240
1241    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1242    page_flush_tb();
1243
1244    tcg_region_reset_all();
1245    /* XXX: flush processor icache at this point if cache flush is
1246       expensive */
1247    atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1248
1249done:
1250    mmap_unlock();
1251    if (did_flush) {
1252        qemu_plugin_flush_cb();
1253    }
1254}
1255
1256void tb_flush(CPUState *cpu)
1257{
1258    if (tcg_enabled()) {
1259        unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count);
1260
1261        if (cpu_in_exclusive_context(cpu)) {
1262            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
1263        } else {
1264            async_safe_run_on_cpu(cpu, do_tb_flush,
1265                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
1266        }
1267    }
1268}
1269
1270/*
1271 * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1272 * so in order to prevent bit rot we compile them unconditionally in user-mode,
1273 * and let the optimizer get rid of them by wrapping their user-only callers
1274 * with if (DEBUG_TB_CHECK_GATE).
1275 */
1276#ifdef CONFIG_USER_ONLY
1277
1278static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1279{
1280    TranslationBlock *tb = p;
1281    target_ulong addr = *(target_ulong *)userp;
1282
1283    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1284        printf("ERROR invalidate: address=" TARGET_FMT_lx
1285               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1286    }
1287}
1288
1289/* verify that all the pages have correct rights for code
1290 *
1291 * Called with mmap_lock held.
1292 */
1293static void tb_invalidate_check(target_ulong address)
1294{
1295    address &= TARGET_PAGE_MASK;
1296    qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1297}
1298
1299static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1300{
1301    TranslationBlock *tb = p;
1302    int flags1, flags2;
1303
1304    flags1 = page_get_flags(tb->pc);
1305    flags2 = page_get_flags(tb->pc + tb->size - 1);
1306    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1307        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1308               (long)tb->pc, tb->size, flags1, flags2);
1309    }
1310}
1311
1312/* verify that all the pages have correct rights for code */
1313static void tb_page_check(void)
1314{
1315    qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1316}
1317
1318#endif /* CONFIG_USER_ONLY */
1319
1320/*
1321 * user-mode: call with mmap_lock held
1322 * !user-mode: call with @pd->lock held
1323 */
1324static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1325{
1326    TranslationBlock *tb1;
1327    uintptr_t *pprev;
1328    unsigned int n1;
1329
1330    assert_page_locked(pd);
1331    pprev = &pd->first_tb;
1332    PAGE_FOR_EACH_TB(pd, tb1, n1) {
1333        if (tb1 == tb) {
1334            *pprev = tb1->page_next[n1];
1335            return;
1336        }
1337        pprev = &tb1->page_next[n1];
1338    }
1339    g_assert_not_reached();
1340}
1341
1342/* remove @orig from its @n_orig-th jump list */
1343static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1344{
1345    uintptr_t ptr, ptr_locked;
1346    TranslationBlock *dest;
1347    TranslationBlock *tb;
1348    uintptr_t *pprev;
1349    int n;
1350
1351    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1352    ptr = atomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1353    dest = (TranslationBlock *)(ptr & ~1);
1354    if (dest == NULL) {
1355        return;
1356    }
1357
1358    qemu_spin_lock(&dest->jmp_lock);
1359    /*
1360     * While acquiring the lock, the jump might have been removed if the
1361     * destination TB was invalidated; check again.
1362     */
1363    ptr_locked = atomic_read(&orig->jmp_dest[n_orig]);
1364    if (ptr_locked != ptr) {
1365        qemu_spin_unlock(&dest->jmp_lock);
1366        /*
1367         * The only possibility is that the jump was unlinked via
1368         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1369         * because we set the LSB above.
1370         */
1371        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1372        return;
1373    }
1374    /*
1375     * We first acquired the lock, and since the destination pointer matches,
1376     * we know for sure that @orig is in the jmp list.
1377     */
1378    pprev = &dest->jmp_list_head;
1379    TB_FOR_EACH_JMP(dest, tb, n) {
1380        if (tb == orig && n == n_orig) {
1381            *pprev = tb->jmp_list_next[n];
1382            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1383            qemu_spin_unlock(&dest->jmp_lock);
1384            return;
1385        }
1386        pprev = &tb->jmp_list_next[n];
1387    }
1388    g_assert_not_reached();
1389}
1390
1391/* reset the jump entry 'n' of a TB so that it is not chained to
1392   another TB */
1393static inline void tb_reset_jump(TranslationBlock *tb, int n)
1394{
1395    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1396    tb_set_jmp_target(tb, n, addr);
1397}
1398
1399/* remove any jumps to the TB */
1400static inline void tb_jmp_unlink(TranslationBlock *dest)
1401{
1402    TranslationBlock *tb;
1403    int n;
1404
1405    qemu_spin_lock(&dest->jmp_lock);
1406
1407    TB_FOR_EACH_JMP(dest, tb, n) {
1408        tb_reset_jump(tb, n);
1409        atomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1410        /* No need to clear the list entry; setting the dest ptr is enough */
1411    }
1412    dest->jmp_list_head = (uintptr_t)NULL;
1413
1414    qemu_spin_unlock(&dest->jmp_lock);
1415}
1416
1417/*
1418 * In user-mode, call with mmap_lock held.
1419 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1420 * locks held.
1421 */
1422static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1423{
1424    CPUState *cpu;
1425    PageDesc *p;
1426    uint32_t h;
1427    tb_page_addr_t phys_pc;
1428
1429    assert_memory_lock();
1430
1431    /* make sure no further incoming jumps will be chained to this TB */
1432    qemu_spin_lock(&tb->jmp_lock);
1433    atomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1434    qemu_spin_unlock(&tb->jmp_lock);
1435
1436    /* remove the TB from the hash list */
1437    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1438    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
1439                     tb->trace_vcpu_dstate);
1440    if (!(tb->cflags & CF_NOCACHE) &&
1441        !qht_remove(&tb_ctx.htable, tb, h)) {
1442        return;
1443    }
1444
1445    /* remove the TB from the page list */
1446    if (rm_from_page_list) {
1447        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1448        tb_page_remove(p, tb);
1449        invalidate_page_bitmap(p);
1450        if (tb->page_addr[1] != -1) {
1451            p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1452            tb_page_remove(p, tb);
1453            invalidate_page_bitmap(p);
1454        }
1455    }
1456
1457    /* remove the TB from the hash list */
1458    h = tb_jmp_cache_hash_func(tb->pc);
1459    CPU_FOREACH(cpu) {
1460        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1461            atomic_set(&cpu->tb_jmp_cache[h], NULL);
1462        }
1463    }
1464
1465    /* suppress this TB from the two jump lists */
1466    tb_remove_from_jmp_list(tb, 0);
1467    tb_remove_from_jmp_list(tb, 1);
1468
1469    /* suppress any remaining jumps to this TB */
1470    tb_jmp_unlink(tb);
1471
1472    atomic_set(&tcg_ctx->tb_phys_invalidate_count,
1473               tcg_ctx->tb_phys_invalidate_count + 1);
1474}
1475
1476static void tb_phys_invalidate__locked(TranslationBlock *tb)
1477{
1478    do_tb_phys_invalidate(tb, true);
1479}
1480
1481/* invalidate one TB
1482 *
1483 * Called with mmap_lock held in user-mode.
1484 */
1485void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1486{
1487    if (page_addr == -1 && tb->page_addr[0] != -1) {
1488        page_lock_tb(tb);
1489        do_tb_phys_invalidate(tb, true);
1490        page_unlock_tb(tb);
1491    } else {
1492        do_tb_phys_invalidate(tb, false);
1493    }
1494}
1495
1496#ifdef CONFIG_SOFTMMU
1497/* call with @p->lock held */
1498static void build_page_bitmap(PageDesc *p)
1499{
1500    int n, tb_start, tb_end;
1501    TranslationBlock *tb;
1502
1503    assert_page_locked(p);
1504    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1505
1506    PAGE_FOR_EACH_TB(p, tb, n) {
1507        /* NOTE: this is subtle as a TB may span two physical pages */
1508        if (n == 0) {
1509            /* NOTE: tb_end may be after the end of the page, but
1510               it is not a problem */
1511            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1512            tb_end = tb_start + tb->size;
1513            if (tb_end > TARGET_PAGE_SIZE) {
1514                tb_end = TARGET_PAGE_SIZE;
1515             }
1516        } else {
1517            tb_start = 0;
1518            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1519        }
1520        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1521    }
1522}
1523#endif
1524
1525/* add the tb in the target page and protect it if necessary
1526 *
1527 * Called with mmap_lock held for user-mode emulation.
1528 * Called with @p->lock held in !user-mode.
1529 */
1530static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1531                               unsigned int n, tb_page_addr_t page_addr)
1532{
1533#ifndef CONFIG_USER_ONLY
1534    bool page_already_protected;
1535#endif
1536
1537    assert_page_locked(p);
1538
1539    tb->page_addr[n] = page_addr;
1540    tb->page_next[n] = p->first_tb;
1541#ifndef CONFIG_USER_ONLY
1542    page_already_protected = p->first_tb != (uintptr_t)NULL;
1543#endif
1544    p->first_tb = (uintptr_t)tb | n;
1545    invalidate_page_bitmap(p);
1546
1547#if defined(CONFIG_USER_ONLY)
1548    if (p->flags & PAGE_WRITE) {
1549        target_ulong addr;
1550        PageDesc *p2;
1551        int prot;
1552
1553        /* force the host page as non writable (writes will have a
1554           page fault + mprotect overhead) */
1555        page_addr &= qemu_host_page_mask;
1556        prot = 0;
1557        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1558            addr += TARGET_PAGE_SIZE) {
1559
1560            p2 = page_find(addr >> TARGET_PAGE_BITS);
1561            if (!p2) {
1562                continue;
1563            }
1564            prot |= p2->flags;
1565            p2->flags &= ~PAGE_WRITE;
1566          }
1567        mprotect(g2h(page_addr), qemu_host_page_size,
1568                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1569        if (DEBUG_TB_INVALIDATE_GATE) {
1570            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1571        }
1572    }
1573#else
1574    /* if some code is already present, then the pages are already
1575       protected. So we handle the case where only the first TB is
1576       allocated in a physical page */
1577    if (!page_already_protected) {
1578        tlb_protect_code(page_addr);
1579    }
1580#endif
1581}
1582
1583/* add a new TB and link it to the physical page tables. phys_page2 is
1584 * (-1) to indicate that only one page contains the TB.
1585 *
1586 * Called with mmap_lock held for user-mode emulation.
1587 *
1588 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1589 * Note that in !user-mode, another thread might have already added a TB
1590 * for the same block of guest code that @tb corresponds to. In that case,
1591 * the caller should discard the original @tb, and use instead the returned TB.
1592 */
1593static TranslationBlock *
1594tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1595             tb_page_addr_t phys_page2)
1596{
1597    PageDesc *p;
1598    PageDesc *p2 = NULL;
1599
1600    assert_memory_lock();
1601
1602    if (phys_pc == -1) {
1603        /*
1604         * If the TB is not associated with a physical RAM page then
1605         * it must be a temporary one-insn TB, and we have nothing to do
1606         * except fill in the page_addr[] fields.
1607         */
1608        assert(tb->cflags & CF_NOCACHE);
1609        tb->page_addr[0] = tb->page_addr[1] = -1;
1610        return tb;
1611    }
1612
1613    /*
1614     * Add the TB to the page list, acquiring first the pages's locks.
1615     * We keep the locks held until after inserting the TB in the hash table,
1616     * so that if the insertion fails we know for sure that the TBs are still
1617     * in the page descriptors.
1618     * Note that inserting into the hash table first isn't an option, since
1619     * we can only insert TBs that are fully initialized.
1620     */
1621    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1622    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1623    if (p2) {
1624        tb_page_add(p2, tb, 1, phys_page2);
1625    } else {
1626        tb->page_addr[1] = -1;
1627    }
1628
1629    if (!(tb->cflags & CF_NOCACHE)) {
1630        void *existing_tb = NULL;
1631        uint32_t h;
1632
1633        /* add in the hash table */
1634        h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
1635                         tb->trace_vcpu_dstate);
1636        qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1637
1638        /* remove TB from the page(s) if we couldn't insert it */
1639        if (unlikely(existing_tb)) {
1640            tb_page_remove(p, tb);
1641            invalidate_page_bitmap(p);
1642            if (p2) {
1643                tb_page_remove(p2, tb);
1644                invalidate_page_bitmap(p2);
1645            }
1646            tb = existing_tb;
1647        }
1648    }
1649
1650    if (p2 && p2 != p) {
1651        page_unlock(p2);
1652    }
1653    page_unlock(p);
1654
1655#ifdef CONFIG_USER_ONLY
1656    if (DEBUG_TB_CHECK_GATE) {
1657        tb_page_check();
1658    }
1659#endif
1660    return tb;
1661}
1662
1663/* Called with mmap_lock held for user mode emulation.  */
1664TranslationBlock *tb_gen_code(CPUState *cpu,
1665                              target_ulong pc, target_ulong cs_base,
1666                              uint32_t flags, int cflags)
1667{
1668    CPUArchState *env = cpu->env_ptr;
1669    TranslationBlock *tb, *existing_tb;
1670    tb_page_addr_t phys_pc, phys_page2;
1671    target_ulong virt_page2;
1672    tcg_insn_unit *gen_code_buf;
1673    int gen_code_size, search_size, max_insns;
1674#ifdef CONFIG_PROFILER
1675    TCGProfile *prof = &tcg_ctx->prof;
1676    int64_t ti;
1677#endif
1678
1679    assert_memory_lock();
1680
1681    phys_pc = get_page_addr_code(env, pc);
1682
1683    if (phys_pc == -1) {
1684        /* Generate a temporary TB with 1 insn in it */
1685        cflags &= ~CF_COUNT_MASK;
1686        cflags |= CF_NOCACHE | 1;
1687    }
1688
1689    cflags &= ~CF_CLUSTER_MASK;
1690    cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
1691
1692    max_insns = cflags & CF_COUNT_MASK;
1693    if (max_insns == 0) {
1694        max_insns = CF_COUNT_MASK;
1695    }
1696    if (max_insns > TCG_MAX_INSNS) {
1697        max_insns = TCG_MAX_INSNS;
1698    }
1699    if (cpu->singlestep_enabled || singlestep) {
1700        max_insns = 1;
1701    }
1702
1703 buffer_overflow:
1704    tb = tcg_tb_alloc(tcg_ctx);
1705    if (unlikely(!tb)) {
1706        /* flush must be done */
1707        tb_flush(cpu);
1708        mmap_unlock();
1709        /* Make the execution loop process the flush as soon as possible.  */
1710        cpu->exception_index = EXCP_INTERRUPT;
1711        cpu_loop_exit(cpu);
1712    }
1713
1714    gen_code_buf = tcg_ctx->code_gen_ptr;
1715    tb->tc.ptr = gen_code_buf;
1716    tb->pc = pc;
1717    tb->cs_base = cs_base;
1718    tb->flags = flags;
1719    tb->cflags = cflags;
1720    tb->orig_tb = NULL;
1721    tb->trace_vcpu_dstate = *cpu->trace_dstate;
1722    tcg_ctx->tb_cflags = cflags;
1723 tb_overflow:
1724
1725#ifdef CONFIG_PROFILER
1726    /* includes aborted translations because of exceptions */
1727    atomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1728    ti = profile_getclock();
1729#endif
1730
1731    tcg_func_start(tcg_ctx);
1732
1733    tcg_ctx->cpu = env_cpu(env);
1734    gen_intermediate_code(cpu, tb, max_insns);
1735    tcg_ctx->cpu = NULL;
1736
1737    trace_translate_block(tb, tb->pc, tb->tc.ptr);
1738
1739    /* generate machine code */
1740    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1741    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1742    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1743    if (TCG_TARGET_HAS_direct_jump) {
1744        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1745        tcg_ctx->tb_jmp_target_addr = NULL;
1746    } else {
1747        tcg_ctx->tb_jmp_insn_offset = NULL;
1748        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1749    }
1750
1751#ifdef CONFIG_PROFILER
1752    atomic_set(&prof->tb_count, prof->tb_count + 1);
1753    atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti);
1754    ti = profile_getclock();
1755#endif
1756
1757    gen_code_size = tcg_gen_code(tcg_ctx, tb);
1758    if (unlikely(gen_code_size < 0)) {
1759        switch (gen_code_size) {
1760        case -1:
1761            /*
1762             * Overflow of code_gen_buffer, or the current slice of it.
1763             *
1764             * TODO: We don't need to re-do gen_intermediate_code, nor
1765             * should we re-do the tcg optimization currently hidden
1766             * inside tcg_gen_code.  All that should be required is to
1767             * flush the TBs, allocate a new TB, re-initialize it per
1768             * above, and re-do the actual code generation.
1769             */
1770            goto buffer_overflow;
1771
1772        case -2:
1773            /*
1774             * The code generated for the TranslationBlock is too large.
1775             * The maximum size allowed by the unwind info is 64k.
1776             * There may be stricter constraints from relocations
1777             * in the tcg backend.
1778             *
1779             * Try again with half as many insns as we attempted this time.
1780             * If a single insn overflows, there's a bug somewhere...
1781             */
1782            max_insns = tb->icount;
1783            assert(max_insns > 1);
1784            max_insns /= 2;
1785            goto tb_overflow;
1786
1787        default:
1788            g_assert_not_reached();
1789        }
1790    }
1791    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1792    if (unlikely(search_size < 0)) {
1793        goto buffer_overflow;
1794    }
1795    tb->tc.size = gen_code_size;
1796
1797#ifdef CONFIG_PROFILER
1798    atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1799    atomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1800    atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1801    atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1802#endif
1803
1804#ifdef DEBUG_DISAS
1805    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1806        qemu_log_in_addr_range(tb->pc)) {
1807        qemu_log_lock();
1808        qemu_log("OUT: [size=%d]\n", gen_code_size);
1809        if (tcg_ctx->data_gen_ptr) {
1810            size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
1811            size_t data_size = gen_code_size - code_size;
1812            size_t i;
1813
1814            log_disas(tb->tc.ptr, code_size);
1815
1816            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1817                if (sizeof(tcg_target_ulong) == 8) {
1818                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1819                             (uintptr_t)tcg_ctx->data_gen_ptr + i,
1820                             *(uint64_t *)(tcg_ctx->data_gen_ptr + i));
1821                } else {
1822                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1823                             (uintptr_t)tcg_ctx->data_gen_ptr + i,
1824                             *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
1825                }
1826            }
1827        } else {
1828            log_disas(tb->tc.ptr, gen_code_size);
1829        }
1830        qemu_log("\n");
1831        qemu_log_flush();
1832        qemu_log_unlock();
1833    }
1834#endif
1835
1836    atomic_set(&tcg_ctx->code_gen_ptr, (void *)
1837        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1838                 CODE_GEN_ALIGN));
1839
1840    /* init jump list */
1841    qemu_spin_init(&tb->jmp_lock);
1842    tb->jmp_list_head = (uintptr_t)NULL;
1843    tb->jmp_list_next[0] = (uintptr_t)NULL;
1844    tb->jmp_list_next[1] = (uintptr_t)NULL;
1845    tb->jmp_dest[0] = (uintptr_t)NULL;
1846    tb->jmp_dest[1] = (uintptr_t)NULL;
1847
1848    /* init original jump addresses which have been set during tcg_gen_code() */
1849    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1850        tb_reset_jump(tb, 0);
1851    }
1852    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1853        tb_reset_jump(tb, 1);
1854    }
1855
1856    /* check next page if needed */
1857    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1858    phys_page2 = -1;
1859    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1860        phys_page2 = get_page_addr_code(env, virt_page2);
1861    }
1862    /*
1863     * No explicit memory barrier is required -- tb_link_page() makes the
1864     * TB visible in a consistent state.
1865     */
1866    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1867    /* if the TB already exists, discard what we just translated */
1868    if (unlikely(existing_tb != tb)) {
1869        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1870
1871        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1872        atomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1873        return existing_tb;
1874    }
1875    tcg_tb_insert(tb);
1876    return tb;
1877}
1878
1879/*
1880 * @p must be non-NULL.
1881 * user-mode: call with mmap_lock held.
1882 * !user-mode: call with all @pages locked.
1883 */
1884static void
1885tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1886                                      PageDesc *p, tb_page_addr_t start,
1887                                      tb_page_addr_t end,
1888                                      uintptr_t retaddr)
1889{
1890    TranslationBlock *tb;
1891    tb_page_addr_t tb_start, tb_end;
1892    int n;
1893#ifdef TARGET_HAS_PRECISE_SMC
1894    CPUState *cpu = current_cpu;
1895    CPUArchState *env = NULL;
1896    bool current_tb_not_found = retaddr != 0;
1897    bool current_tb_modified = false;
1898    TranslationBlock *current_tb = NULL;
1899    target_ulong current_pc = 0;
1900    target_ulong current_cs_base = 0;
1901    uint32_t current_flags = 0;
1902#endif /* TARGET_HAS_PRECISE_SMC */
1903
1904    assert_page_locked(p);
1905
1906#if defined(TARGET_HAS_PRECISE_SMC)
1907    if (cpu != NULL) {
1908        env = cpu->env_ptr;
1909    }
1910#endif
1911
1912    /* we remove all the TBs in the range [start, end[ */
1913    /* XXX: see if in some cases it could be faster to invalidate all
1914       the code */
1915    PAGE_FOR_EACH_TB(p, tb, n) {
1916        assert_page_locked(p);
1917        /* NOTE: this is subtle as a TB may span two physical pages */
1918        if (n == 0) {
1919            /* NOTE: tb_end may be after the end of the page, but
1920               it is not a problem */
1921            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1922            tb_end = tb_start + tb->size;
1923        } else {
1924            tb_start = tb->page_addr[1];
1925            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1926        }
1927        if (!(tb_end <= start || tb_start >= end)) {
1928#ifdef TARGET_HAS_PRECISE_SMC
1929            if (current_tb_not_found) {
1930                current_tb_not_found = false;
1931                /* now we have a real cpu fault */
1932                current_tb = tcg_tb_lookup(retaddr);
1933            }
1934            if (current_tb == tb &&
1935                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1936                /*
1937                 * If we are modifying the current TB, we must stop
1938                 * its execution. We could be more precise by checking
1939                 * that the modification is after the current PC, but it
1940                 * would require a specialized function to partially
1941                 * restore the CPU state.
1942                 */
1943                current_tb_modified = true;
1944                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
1945                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1946                                     &current_flags);
1947            }
1948#endif /* TARGET_HAS_PRECISE_SMC */
1949            tb_phys_invalidate__locked(tb);
1950        }
1951    }
1952#if !defined(CONFIG_USER_ONLY)
1953    /* if no code remaining, no need to continue to use slow writes */
1954    if (!p->first_tb) {
1955        invalidate_page_bitmap(p);
1956        tlb_unprotect_code(start);
1957    }
1958#endif
1959#ifdef TARGET_HAS_PRECISE_SMC
1960    if (current_tb_modified) {
1961        page_collection_unlock(pages);
1962        /* Force execution of one insn next time.  */
1963        cpu->cflags_next_tb = 1 | curr_cflags();
1964        mmap_unlock();
1965        cpu_loop_exit_noexc(cpu);
1966    }
1967#endif
1968}
1969
1970/*
1971 * Invalidate all TBs which intersect with the target physical address range
1972 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1973 * 'is_cpu_write_access' should be true if called from a real cpu write
1974 * access: the virtual CPU will exit the current TB if code is modified inside
1975 * this TB.
1976 *
1977 * Called with mmap_lock held for user-mode emulation
1978 */
1979void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end)
1980{
1981    struct page_collection *pages;
1982    PageDesc *p;
1983
1984    assert_memory_lock();
1985
1986    p = page_find(start >> TARGET_PAGE_BITS);
1987    if (p == NULL) {
1988        return;
1989    }
1990    pages = page_collection_lock(start, end);
1991    tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
1992    page_collection_unlock(pages);
1993}
1994
1995/*
1996 * Invalidate all TBs which intersect with the target physical address range
1997 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1998 * 'is_cpu_write_access' should be true if called from a real cpu write
1999 * access: the virtual CPU will exit the current TB if code is modified inside
2000 * this TB.
2001 *
2002 * Called with mmap_lock held for user-mode emulation.
2003 */
2004#ifdef CONFIG_SOFTMMU
2005void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
2006#else
2007void tb_invalidate_phys_range(target_ulong start, target_ulong end)
2008#endif
2009{
2010    struct page_collection *pages;
2011    tb_page_addr_t next;
2012
2013    assert_memory_lock();
2014
2015    pages = page_collection_lock(start, end);
2016    for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
2017         start < end;
2018         start = next, next += TARGET_PAGE_SIZE) {
2019        PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
2020        tb_page_addr_t bound = MIN(next, end);
2021
2022        if (pd == NULL) {
2023            continue;
2024        }
2025        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
2026    }
2027    page_collection_unlock(pages);
2028}
2029
2030#ifdef CONFIG_SOFTMMU
2031/* len must be <= 8 and start must be a multiple of len.
2032 * Called via softmmu_template.h when code areas are written to with
2033 * iothread mutex not held.
2034 *
2035 * Call with all @pages in the range [@start, @start + len[ locked.
2036 */
2037void tb_invalidate_phys_page_fast(struct page_collection *pages,
2038                                  tb_page_addr_t start, int len,
2039                                  uintptr_t retaddr)
2040{
2041    PageDesc *p;
2042
2043    assert_memory_lock();
2044
2045    p = page_find(start >> TARGET_PAGE_BITS);
2046    if (!p) {
2047        return;
2048    }
2049
2050    assert_page_locked(p);
2051    if (!p->code_bitmap &&
2052        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
2053        build_page_bitmap(p);
2054    }
2055    if (p->code_bitmap) {
2056        unsigned int nr;
2057        unsigned long b;
2058
2059        nr = start & ~TARGET_PAGE_MASK;
2060        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
2061        if (b & ((1 << len) - 1)) {
2062            goto do_invalidate;
2063        }
2064    } else {
2065    do_invalidate:
2066        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
2067                                              retaddr);
2068    }
2069}
2070#else
2071/* Called with mmap_lock held. If pc is not 0 then it indicates the
2072 * host PC of the faulting store instruction that caused this invalidate.
2073 * Returns true if the caller needs to abort execution of the current
2074 * TB (because it was modified by this store and the guest CPU has
2075 * precise-SMC semantics).
2076 */
2077static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
2078{
2079    TranslationBlock *tb;
2080    PageDesc *p;
2081    int n;
2082#ifdef TARGET_HAS_PRECISE_SMC
2083    TranslationBlock *current_tb = NULL;
2084    CPUState *cpu = current_cpu;
2085    CPUArchState *env = NULL;
2086    int current_tb_modified = 0;
2087    target_ulong current_pc = 0;
2088    target_ulong current_cs_base = 0;
2089    uint32_t current_flags = 0;
2090#endif
2091
2092    assert_memory_lock();
2093
2094    addr &= TARGET_PAGE_MASK;
2095    p = page_find(addr >> TARGET_PAGE_BITS);
2096    if (!p) {
2097        return false;
2098    }
2099
2100#ifdef TARGET_HAS_PRECISE_SMC
2101    if (p->first_tb && pc != 0) {
2102        current_tb = tcg_tb_lookup(pc);
2103    }
2104    if (cpu != NULL) {
2105        env = cpu->env_ptr;
2106    }
2107#endif
2108    assert_page_locked(p);
2109    PAGE_FOR_EACH_TB(p, tb, n) {
2110#ifdef TARGET_HAS_PRECISE_SMC
2111        if (current_tb == tb &&
2112            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2113                /* If we are modifying the current TB, we must stop
2114                   its execution. We could be more precise by checking
2115                   that the modification is after the current PC, but it
2116                   would require a specialized function to partially
2117                   restore the CPU state */
2118
2119            current_tb_modified = 1;
2120            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
2121            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2122                                 &current_flags);
2123        }
2124#endif /* TARGET_HAS_PRECISE_SMC */
2125        tb_phys_invalidate(tb, addr);
2126    }
2127    p->first_tb = (uintptr_t)NULL;
2128#ifdef TARGET_HAS_PRECISE_SMC
2129    if (current_tb_modified) {
2130        /* Force execution of one insn next time.  */
2131        cpu->cflags_next_tb = 1 | curr_cflags();
2132        return true;
2133    }
2134#endif
2135
2136    return false;
2137}
2138#endif
2139
2140/* user-mode: call with mmap_lock held */
2141void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
2142{
2143    TranslationBlock *tb;
2144
2145    assert_memory_lock();
2146
2147    tb = tcg_tb_lookup(retaddr);
2148    if (tb) {
2149        /* We can use retranslation to find the PC.  */
2150        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2151        tb_phys_invalidate(tb, -1);
2152    } else {
2153        /* The exception probably happened in a helper.  The CPU state should
2154           have been saved before calling it. Fetch the PC from there.  */
2155        CPUArchState *env = cpu->env_ptr;
2156        target_ulong pc, cs_base;
2157        tb_page_addr_t addr;
2158        uint32_t flags;
2159
2160        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2161        addr = get_page_addr_code(env, pc);
2162        if (addr != -1) {
2163            tb_invalidate_phys_range(addr, addr + 1);
2164        }
2165    }
2166}
2167
2168#ifndef CONFIG_USER_ONLY
2169/* in deterministic execution mode, instructions doing device I/Os
2170 * must be at the end of the TB.
2171 *
2172 * Called by softmmu_template.h, with iothread mutex not held.
2173 */
2174void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2175{
2176#if defined(TARGET_MIPS) || defined(TARGET_SH4)
2177    CPUArchState *env = cpu->env_ptr;
2178#endif
2179    TranslationBlock *tb;
2180    uint32_t n;
2181
2182    tb = tcg_tb_lookup(retaddr);
2183    if (!tb) {
2184        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2185                  (void *)retaddr);
2186    }
2187    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2188
2189    /* On MIPS and SH, delay slot instructions can only be restarted if
2190       they were already the first instruction in the TB.  If this is not
2191       the first instruction in a TB then re-execute the preceding
2192       branch.  */
2193    n = 1;
2194#if defined(TARGET_MIPS)
2195    if ((env->hflags & MIPS_HFLAG_BMASK) != 0
2196        && env->active_tc.PC != tb->pc) {
2197        env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
2198        cpu_neg(cpu)->icount_decr.u16.low++;
2199        env->hflags &= ~MIPS_HFLAG_BMASK;
2200        n = 2;
2201    }
2202#elif defined(TARGET_SH4)
2203    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
2204        && env->pc != tb->pc) {
2205        env->pc -= 2;
2206        cpu_neg(cpu)->icount_decr.u16.low++;
2207        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
2208        n = 2;
2209    }
2210#endif
2211
2212    /* Generate a new TB executing the I/O insn.  */
2213    cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
2214
2215    if (tb_cflags(tb) & CF_NOCACHE) {
2216        if (tb->orig_tb) {
2217            /* Invalidate original TB if this TB was generated in
2218             * cpu_exec_nocache() */
2219            tb_phys_invalidate(tb->orig_tb, -1);
2220        }
2221        tcg_tb_remove(tb);
2222    }
2223
2224    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
2225     * the first in the TB) then we end up generating a whole new TB and
2226     *  repeating the fault, which is horribly inefficient.
2227     *  Better would be to execute just this insn uncached, or generate a
2228     *  second new TB.
2229     */
2230    cpu_loop_exit_noexc(cpu);
2231}
2232
2233static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
2234{
2235    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
2236
2237    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
2238        atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
2239    }
2240}
2241
2242void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
2243{
2244    /* Discard jump cache entries for any tb which might potentially
2245       overlap the flushed page.  */
2246    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
2247    tb_jmp_cache_clear_page(cpu, addr);
2248}
2249
2250static void print_qht_statistics(struct qht_stats hst)
2251{
2252    uint32_t hgram_opts;
2253    size_t hgram_bins;
2254    char *hgram;
2255
2256    if (!hst.head_buckets) {
2257        return;
2258    }
2259    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2260                hst.used_head_buckets, hst.head_buckets,
2261                (double)hst.used_head_buckets / hst.head_buckets * 100);
2262
2263    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2264    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2265    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2266        hgram_opts |= QDIST_PR_NODECIMAL;
2267    }
2268    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2269    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2270                qdist_avg(&hst.occupancy) * 100, hgram);
2271    g_free(hgram);
2272
2273    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2274    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2275    if (hgram_bins > 10) {
2276        hgram_bins = 10;
2277    } else {
2278        hgram_bins = 0;
2279        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2280    }
2281    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2282    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2283                qdist_avg(&hst.chain), hgram);
2284    g_free(hgram);
2285}
2286
2287struct tb_tree_stats {
2288    size_t nb_tbs;
2289    size_t host_size;
2290    size_t target_size;
2291    size_t max_target_size;
2292    size_t direct_jmp_count;
2293    size_t direct_jmp2_count;
2294    size_t cross_page;
2295};
2296
2297static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2298{
2299    const TranslationBlock *tb = value;
2300    struct tb_tree_stats *tst = data;
2301
2302    tst->nb_tbs++;
2303    tst->host_size += tb->tc.size;
2304    tst->target_size += tb->size;
2305    if (tb->size > tst->max_target_size) {
2306        tst->max_target_size = tb->size;
2307    }
2308    if (tb->page_addr[1] != -1) {
2309        tst->cross_page++;
2310    }
2311    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2312        tst->direct_jmp_count++;
2313        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2314            tst->direct_jmp2_count++;
2315        }
2316    }
2317    return false;
2318}
2319
2320void dump_exec_info(void)
2321{
2322    struct tb_tree_stats tst = {};
2323    struct qht_stats hst;
2324    size_t nb_tbs, flush_full, flush_part, flush_elide;
2325
2326    tcg_tb_foreach(tb_tree_stats_iter, &tst);
2327    nb_tbs = tst.nb_tbs;
2328    /* XXX: avoid using doubles ? */
2329    qemu_printf("Translation buffer state:\n");
2330    /*
2331     * Report total code size including the padding and TB structs;
2332     * otherwise users might think "-tb-size" is not honoured.
2333     * For avg host size we use the precise numbers from tb_tree_stats though.
2334     */
2335    qemu_printf("gen code size       %zu/%zu\n",
2336                tcg_code_size(), tcg_code_capacity());
2337    qemu_printf("TB count            %zu\n", nb_tbs);
2338    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2339                nb_tbs ? tst.target_size / nb_tbs : 0,
2340                tst.max_target_size);
2341    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2342                nb_tbs ? tst.host_size / nb_tbs : 0,
2343                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2344    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2345                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2346    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2347                tst.direct_jmp_count,
2348                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2349                tst.direct_jmp2_count,
2350                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2351
2352    qht_statistics_init(&tb_ctx.htable, &hst);
2353    print_qht_statistics(hst);
2354    qht_statistics_destroy(&hst);
2355
2356    qemu_printf("\nStatistics:\n");
2357    qemu_printf("TB flush count      %u\n",
2358                atomic_read(&tb_ctx.tb_flush_count));
2359    qemu_printf("TB invalidate count %zu\n",
2360                tcg_tb_phys_invalidate_count());
2361
2362    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2363    qemu_printf("TLB full flushes    %zu\n", flush_full);
2364    qemu_printf("TLB partial flushes %zu\n", flush_part);
2365    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2366    tcg_dump_info();
2367}
2368
2369void dump_opcount_info(void)
2370{
2371    tcg_dump_op_count();
2372}
2373
2374#else /* CONFIG_USER_ONLY */
2375
2376void cpu_interrupt(CPUState *cpu, int mask)
2377{
2378    g_assert(qemu_mutex_iothread_locked());
2379    cpu->interrupt_request |= mask;
2380    atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2381}
2382
2383/*
2384 * Walks guest process memory "regions" one by one
2385 * and calls callback function 'fn' for each region.
2386 */
2387struct walk_memory_regions_data {
2388    walk_memory_regions_fn fn;
2389    void *priv;
2390    target_ulong start;
2391    int prot;
2392};
2393
2394static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2395                                   target_ulong end, int new_prot)
2396{
2397    if (data->start != -1u) {
2398        int rc = data->fn(data->priv, data->start, end, data->prot);
2399        if (rc != 0) {
2400            return rc;
2401        }
2402    }
2403
2404    data->start = (new_prot ? end : -1u);
2405    data->prot = new_prot;
2406
2407    return 0;
2408}
2409
2410static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2411                                 target_ulong base, int level, void **lp)
2412{
2413    target_ulong pa;
2414    int i, rc;
2415
2416    if (*lp == NULL) {
2417        return walk_memory_regions_end(data, base, 0);
2418    }
2419
2420    if (level == 0) {
2421        PageDesc *pd = *lp;
2422
2423        for (i = 0; i < V_L2_SIZE; ++i) {
2424            int prot = pd[i].flags;
2425
2426            pa = base | (i << TARGET_PAGE_BITS);
2427            if (prot != data->prot) {
2428                rc = walk_memory_regions_end(data, pa, prot);
2429                if (rc != 0) {
2430                    return rc;
2431                }
2432            }
2433        }
2434    } else {
2435        void **pp = *lp;
2436
2437        for (i = 0; i < V_L2_SIZE; ++i) {
2438            pa = base | ((target_ulong)i <<
2439                (TARGET_PAGE_BITS + V_L2_BITS * level));
2440            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2441            if (rc != 0) {
2442                return rc;
2443            }
2444        }
2445    }
2446
2447    return 0;
2448}
2449
2450int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2451{
2452    struct walk_memory_regions_data data;
2453    uintptr_t i, l1_sz = v_l1_size;
2454
2455    data.fn = fn;
2456    data.priv = priv;
2457    data.start = -1u;
2458    data.prot = 0;
2459
2460    for (i = 0; i < l1_sz; i++) {
2461        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2462        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2463        if (rc != 0) {
2464            return rc;
2465        }
2466    }
2467
2468    return walk_memory_regions_end(&data, 0, 0);
2469}
2470
2471static int dump_region(void *priv, target_ulong start,
2472    target_ulong end, unsigned long prot)
2473{
2474    FILE *f = (FILE *)priv;
2475
2476    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2477        " "TARGET_FMT_lx" %c%c%c\n",
2478        start, end, end - start,
2479        ((prot & PAGE_READ) ? 'r' : '-'),
2480        ((prot & PAGE_WRITE) ? 'w' : '-'),
2481        ((prot & PAGE_EXEC) ? 'x' : '-'));
2482
2483    return 0;
2484}
2485
2486/* dump memory mappings */
2487void page_dump(FILE *f)
2488{
2489    const int length = sizeof(target_ulong) * 2;
2490    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2491            length, "start", length, "end", length, "size", "prot");
2492    walk_memory_regions(f, dump_region);
2493}
2494
2495int page_get_flags(target_ulong address)
2496{
2497    PageDesc *p;
2498
2499    p = page_find(address >> TARGET_PAGE_BITS);
2500    if (!p) {
2501        return 0;
2502    }
2503    return p->flags;
2504}
2505
2506/* Modify the flags of a page and invalidate the code if necessary.
2507   The flag PAGE_WRITE_ORG is positioned automatically depending
2508   on PAGE_WRITE.  The mmap_lock should already be held.  */
2509void page_set_flags(target_ulong start, target_ulong end, int flags)
2510{
2511    target_ulong addr, len;
2512
2513    /* This function should never be called with addresses outside the
2514       guest address space.  If this assert fires, it probably indicates
2515       a missing call to h2g_valid.  */
2516#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2517    assert(end <= ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2518#endif
2519    assert(start < end);
2520    assert_memory_lock();
2521
2522    start = start & TARGET_PAGE_MASK;
2523    end = TARGET_PAGE_ALIGN(end);
2524
2525    if (flags & PAGE_WRITE) {
2526        flags |= PAGE_WRITE_ORG;
2527    }
2528
2529    for (addr = start, len = end - start;
2530         len != 0;
2531         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2532        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2533
2534        /* If the write protection bit is set, then we invalidate
2535           the code inside.  */
2536        if (!(p->flags & PAGE_WRITE) &&
2537            (flags & PAGE_WRITE) &&
2538            p->first_tb) {
2539            tb_invalidate_phys_page(addr, 0);
2540        }
2541        p->flags = flags;
2542    }
2543}
2544
2545int page_check_range(target_ulong start, target_ulong len, int flags)
2546{
2547    PageDesc *p;
2548    target_ulong end;
2549    target_ulong addr;
2550
2551    /* This function should never be called with addresses outside the
2552       guest address space.  If this assert fires, it probably indicates
2553       a missing call to h2g_valid.  */
2554#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2555    assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2556#endif
2557
2558    if (len == 0) {
2559        return 0;
2560    }
2561    if (start + len - 1 < start) {
2562        /* We've wrapped around.  */
2563        return -1;
2564    }
2565
2566    /* must do before we loose bits in the next step */
2567    end = TARGET_PAGE_ALIGN(start + len);
2568    start = start & TARGET_PAGE_MASK;
2569
2570    for (addr = start, len = end - start;
2571         len != 0;
2572         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2573        p = page_find(addr >> TARGET_PAGE_BITS);
2574        if (!p) {
2575            return -1;
2576        }
2577        if (!(p->flags & PAGE_VALID)) {
2578            return -1;
2579        }
2580
2581        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2582            return -1;
2583        }
2584        if (flags & PAGE_WRITE) {
2585            if (!(p->flags & PAGE_WRITE_ORG)) {
2586                return -1;
2587            }
2588            /* unprotect the page if it was put read-only because it
2589               contains translated code */
2590            if (!(p->flags & PAGE_WRITE)) {
2591                if (!page_unprotect(addr, 0)) {
2592                    return -1;
2593                }
2594            }
2595        }
2596    }
2597    return 0;
2598}
2599
2600/* called from signal handler: invalidate the code and unprotect the
2601 * page. Return 0 if the fault was not handled, 1 if it was handled,
2602 * and 2 if it was handled but the caller must cause the TB to be
2603 * immediately exited. (We can only return 2 if the 'pc' argument is
2604 * non-zero.)
2605 */
2606int page_unprotect(target_ulong address, uintptr_t pc)
2607{
2608    unsigned int prot;
2609    bool current_tb_invalidated;
2610    PageDesc *p;
2611    target_ulong host_start, host_end, addr;
2612
2613    /* Technically this isn't safe inside a signal handler.  However we
2614       know this only ever happens in a synchronous SEGV handler, so in
2615       practice it seems to be ok.  */
2616    mmap_lock();
2617
2618    p = page_find(address >> TARGET_PAGE_BITS);
2619    if (!p) {
2620        mmap_unlock();
2621        return 0;
2622    }
2623
2624    /* if the page was really writable, then we change its
2625       protection back to writable */
2626    if (p->flags & PAGE_WRITE_ORG) {
2627        current_tb_invalidated = false;
2628        if (p->flags & PAGE_WRITE) {
2629            /* If the page is actually marked WRITE then assume this is because
2630             * this thread raced with another one which got here first and
2631             * set the page to PAGE_WRITE and did the TB invalidate for us.
2632             */
2633#ifdef TARGET_HAS_PRECISE_SMC
2634            TranslationBlock *current_tb = tcg_tb_lookup(pc);
2635            if (current_tb) {
2636                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2637            }
2638#endif
2639        } else {
2640            host_start = address & qemu_host_page_mask;
2641            host_end = host_start + qemu_host_page_size;
2642
2643            prot = 0;
2644            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2645                p = page_find(addr >> TARGET_PAGE_BITS);
2646                p->flags |= PAGE_WRITE;
2647                prot |= p->flags;
2648
2649                /* and since the content will be modified, we must invalidate
2650                   the corresponding translated code. */
2651                current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2652#ifdef CONFIG_USER_ONLY
2653                if (DEBUG_TB_CHECK_GATE) {
2654                    tb_invalidate_check(addr);
2655                }
2656#endif
2657            }
2658            mprotect((void *)g2h(host_start), qemu_host_page_size,
2659                     prot & PAGE_BITS);
2660        }
2661        mmap_unlock();
2662        /* If current TB was invalidated return to main loop */
2663        return current_tb_invalidated ? 2 : 1;
2664    }
2665    mmap_unlock();
2666    return 0;
2667}
2668#endif /* CONFIG_USER_ONLY */
2669
2670/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2671void tcg_flush_softmmu_tlb(CPUState *cs)
2672{
2673#ifdef CONFIG_SOFTMMU
2674    tlb_flush(cs);
2675#endif
2676}
2677