qemu/accel/tcg/translate-all.c
<<
>>
Prefs
   1/*
   2 *  Host code generation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22
  23#define NO_CPU_IO_DEFS
  24#include "cpu.h"
  25#include "trace.h"
  26#include "disas/disas.h"
  27#include "exec/exec-all.h"
  28#include "tcg.h"
  29#if defined(CONFIG_USER_ONLY)
  30#include "qemu.h"
  31#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  32#include <sys/param.h>
  33#if __FreeBSD_version >= 700104
  34#define HAVE_KINFO_GETVMMAP
  35#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  36#include <sys/proc.h>
  37#include <machine/profile.h>
  38#define _KERNEL
  39#include <sys/user.h>
  40#undef _KERNEL
  41#undef sigqueue
  42#include <libutil.h>
  43#endif
  44#endif
  45#else
  46#include "exec/ram_addr.h"
  47#endif
  48
  49#include "exec/cputlb.h"
  50#include "exec/tb-hash.h"
  51#include "translate-all.h"
  52#include "qemu/bitmap.h"
  53#include "qemu/error-report.h"
  54#include "qemu/qemu-print.h"
  55#include "qemu/timer.h"
  56#include "qemu/main-loop.h"
  57#include "exec/log.h"
  58#include "sysemu/cpus.h"
  59#include "sysemu/tcg.h"
  60
  61/* #define DEBUG_TB_INVALIDATE */
  62/* #define DEBUG_TB_FLUSH */
  63/* make various TB consistency checks */
  64/* #define DEBUG_TB_CHECK */
  65
  66#ifdef DEBUG_TB_INVALIDATE
  67#define DEBUG_TB_INVALIDATE_GATE 1
  68#else
  69#define DEBUG_TB_INVALIDATE_GATE 0
  70#endif
  71
  72#ifdef DEBUG_TB_FLUSH
  73#define DEBUG_TB_FLUSH_GATE 1
  74#else
  75#define DEBUG_TB_FLUSH_GATE 0
  76#endif
  77
  78#if !defined(CONFIG_USER_ONLY)
  79/* TB consistency checks only implemented for usermode emulation.  */
  80#undef DEBUG_TB_CHECK
  81#endif
  82
  83#ifdef DEBUG_TB_CHECK
  84#define DEBUG_TB_CHECK_GATE 1
  85#else
  86#define DEBUG_TB_CHECK_GATE 0
  87#endif
  88
  89/* Access to the various translations structures need to be serialised via locks
  90 * for consistency.
  91 * In user-mode emulation access to the memory related structures are protected
  92 * with mmap_lock.
  93 * In !user-mode we use per-page locks.
  94 */
  95#ifdef CONFIG_SOFTMMU
  96#define assert_memory_lock()
  97#else
  98#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
  99#endif
 100
 101#define SMC_BITMAP_USE_THRESHOLD 10
 102
 103typedef struct PageDesc {
 104    /* list of TBs intersecting this ram page */
 105    uintptr_t first_tb;
 106#ifdef CONFIG_SOFTMMU
 107    /* in order to optimize self modifying code, we count the number
 108       of lookups we do to a given page to use a bitmap */
 109    unsigned long *code_bitmap;
 110    unsigned int code_write_count;
 111#else
 112    unsigned long flags;
 113#endif
 114#ifndef CONFIG_USER_ONLY
 115    QemuSpin lock;
 116#endif
 117} PageDesc;
 118
 119/**
 120 * struct page_entry - page descriptor entry
 121 * @pd:     pointer to the &struct PageDesc of the page this entry represents
 122 * @index:  page index of the page
 123 * @locked: whether the page is locked
 124 *
 125 * This struct helps us keep track of the locked state of a page, without
 126 * bloating &struct PageDesc.
 127 *
 128 * A page lock protects accesses to all fields of &struct PageDesc.
 129 *
 130 * See also: &struct page_collection.
 131 */
 132struct page_entry {
 133    PageDesc *pd;
 134    tb_page_addr_t index;
 135    bool locked;
 136};
 137
 138/**
 139 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
 140 * @tree:   Binary search tree (BST) of the pages, with key == page index
 141 * @max:    Pointer to the page in @tree with the highest page index
 142 *
 143 * To avoid deadlock we lock pages in ascending order of page index.
 144 * When operating on a set of pages, we need to keep track of them so that
 145 * we can lock them in order and also unlock them later. For this we collect
 146 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
 147 * @tree implementation we use does not provide an O(1) operation to obtain the
 148 * highest-ranked element, we use @max to keep track of the inserted page
 149 * with the highest index. This is valuable because if a page is not in
 150 * the tree and its index is higher than @max's, then we can lock it
 151 * without breaking the locking order rule.
 152 *
 153 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 154 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 155 *
 156 * See also: page_collection_lock().
 157 */
 158struct page_collection {
 159    GTree *tree;
 160    struct page_entry *max;
 161};
 162
 163/* list iterators for lists of tagged pointers in TranslationBlock */
 164#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
 165    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
 166         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
 167             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
 168
 169#define PAGE_FOR_EACH_TB(pagedesc, tb, n)                       \
 170    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
 171
 172#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
 173    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 174
 175/* In system mode we want L1_MAP to be based on ram offsets,
 176   while in user mode we want it to be based on virtual addresses.  */
 177#if !defined(CONFIG_USER_ONLY)
 178#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 179# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 180#else
 181# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 182#endif
 183#else
 184# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
 185#endif
 186
 187/* Size of the L2 (and L3, etc) page tables.  */
 188#define V_L2_BITS 10
 189#define V_L2_SIZE (1 << V_L2_BITS)
 190
 191/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
 192QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
 193                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
 194                  * BITS_PER_BYTE);
 195
 196/*
 197 * L1 Mapping properties
 198 */
 199static int v_l1_size;
 200static int v_l1_shift;
 201static int v_l2_levels;
 202
 203/* The bottom level has pointers to PageDesc, and is indexed by
 204 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
 205 */
 206#define V_L1_MIN_BITS 4
 207#define V_L1_MAX_BITS (V_L2_BITS + 3)
 208#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
 209
 210static void *l1_map[V_L1_MAX_SIZE];
 211
 212/* code generation context */
 213TCGContext tcg_init_ctx;
 214__thread TCGContext *tcg_ctx;
 215TBContext tb_ctx;
 216bool parallel_cpus;
 217
 218static void page_table_config_init(void)
 219{
 220    uint32_t v_l1_bits;
 221
 222    assert(TARGET_PAGE_BITS);
 223    /* The bits remaining after N lower levels of page tables.  */
 224    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 225    if (v_l1_bits < V_L1_MIN_BITS) {
 226        v_l1_bits += V_L2_BITS;
 227    }
 228
 229    v_l1_size = 1 << v_l1_bits;
 230    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 231    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 232
 233    assert(v_l1_bits <= V_L1_MAX_BITS);
 234    assert(v_l1_shift % V_L2_BITS == 0);
 235    assert(v_l2_levels >= 0);
 236}
 237
 238void cpu_gen_init(void)
 239{
 240    tcg_context_init(&tcg_init_ctx);
 241}
 242
 243/* Encode VAL as a signed leb128 sequence at P.
 244   Return P incremented past the encoded value.  */
 245static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 246{
 247    int more, byte;
 248
 249    do {
 250        byte = val & 0x7f;
 251        val >>= 7;
 252        more = !((val == 0 && (byte & 0x40) == 0)
 253                 || (val == -1 && (byte & 0x40) != 0));
 254        if (more) {
 255            byte |= 0x80;
 256        }
 257        *p++ = byte;
 258    } while (more);
 259
 260    return p;
 261}
 262
 263/* Decode a signed leb128 sequence at *PP; increment *PP past the
 264   decoded value.  Return the decoded value.  */
 265static target_long decode_sleb128(uint8_t **pp)
 266{
 267    uint8_t *p = *pp;
 268    target_long val = 0;
 269    int byte, shift = 0;
 270
 271    do {
 272        byte = *p++;
 273        val |= (target_ulong)(byte & 0x7f) << shift;
 274        shift += 7;
 275    } while (byte & 0x80);
 276    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 277        val |= -(target_ulong)1 << shift;
 278    }
 279
 280    *pp = p;
 281    return val;
 282}
 283
 284/* Encode the data collected about the instructions while compiling TB.
 285   Place the data at BLOCK, and return the number of bytes consumed.
 286
 287   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 288   which come from the target's insn_start data, followed by a uintptr_t
 289   which comes from the host pc of the end of the code implementing the insn.
 290
 291   Each line of the table is encoded as sleb128 deltas from the previous
 292   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 293   That is, the first column is seeded with the guest pc, the last column
 294   with the host pc, and the middle columns with zeros.  */
 295
 296static int encode_search(TranslationBlock *tb, uint8_t *block)
 297{
 298    uint8_t *highwater = tcg_ctx->code_gen_highwater;
 299    uint8_t *p = block;
 300    int i, j, n;
 301
 302    for (i = 0, n = tb->icount; i < n; ++i) {
 303        target_ulong prev;
 304
 305        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 306            if (i == 0) {
 307                prev = (j == 0 ? tb->pc : 0);
 308            } else {
 309                prev = tcg_ctx->gen_insn_data[i - 1][j];
 310            }
 311            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
 312        }
 313        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
 314        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
 315
 316        /* Test for (pending) buffer overflow.  The assumption is that any
 317           one row beginning below the high water mark cannot overrun
 318           the buffer completely.  Thus we can test for overflow after
 319           encoding a row without having to check during encoding.  */
 320        if (unlikely(p > highwater)) {
 321            return -1;
 322        }
 323    }
 324
 325    return p - block;
 326}
 327
 328/* The cpu state corresponding to 'searched_pc' is restored.
 329 * When reset_icount is true, current TB will be interrupted and
 330 * icount should be recalculated.
 331 */
 332static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 333                                     uintptr_t searched_pc, bool reset_icount)
 334{
 335    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
 336    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
 337    CPUArchState *env = cpu->env_ptr;
 338    uint8_t *p = tb->tc.ptr + tb->tc.size;
 339    int i, j, num_insns = tb->icount;
 340#ifdef CONFIG_PROFILER
 341    TCGProfile *prof = &tcg_ctx->prof;
 342    int64_t ti = profile_getclock();
 343#endif
 344
 345    searched_pc -= GETPC_ADJ;
 346
 347    if (searched_pc < host_pc) {
 348        return -1;
 349    }
 350
 351    /* Reconstruct the stored insn data while looking for the point at
 352       which the end of the insn exceeds the searched_pc.  */
 353    for (i = 0; i < num_insns; ++i) {
 354        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 355            data[j] += decode_sleb128(&p);
 356        }
 357        host_pc += decode_sleb128(&p);
 358        if (host_pc > searched_pc) {
 359            goto found;
 360        }
 361    }
 362    return -1;
 363
 364 found:
 365    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
 366        assert(use_icount);
 367        /* Reset the cycle counter to the start of the block
 368           and shift if to the number of actually executed instructions */
 369        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
 370    }
 371    restore_state_to_opc(env, tb, data);
 372
 373#ifdef CONFIG_PROFILER
 374    atomic_set(&prof->restore_time,
 375                prof->restore_time + profile_getclock() - ti);
 376    atomic_set(&prof->restore_count, prof->restore_count + 1);
 377#endif
 378    return 0;
 379}
 380
 381bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
 382{
 383    TranslationBlock *tb;
 384    bool r = false;
 385    uintptr_t check_offset;
 386
 387    /* The host_pc has to be in the region of current code buffer. If
 388     * it is not we will not be able to resolve it here. The two cases
 389     * where host_pc will not be correct are:
 390     *
 391     *  - fault during translation (instruction fetch)
 392     *  - fault from helper (not using GETPC() macro)
 393     *
 394     * Either way we need return early as we can't resolve it here.
 395     *
 396     * We are using unsigned arithmetic so if host_pc <
 397     * tcg_init_ctx.code_gen_buffer check_offset will wrap to way
 398     * above the code_gen_buffer_size
 399     */
 400    check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer;
 401
 402    if (check_offset < tcg_init_ctx.code_gen_buffer_size) {
 403        tb = tcg_tb_lookup(host_pc);
 404        if (tb) {
 405            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
 406            if (tb_cflags(tb) & CF_NOCACHE) {
 407                /* one-shot translation, invalidate it immediately */
 408                tb_phys_invalidate(tb, -1);
 409                tcg_tb_remove(tb);
 410            }
 411            r = true;
 412        }
 413    }
 414
 415    return r;
 416}
 417
 418static void page_init(void)
 419{
 420    page_size_init();
 421    page_table_config_init();
 422
 423#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 424    {
 425#ifdef HAVE_KINFO_GETVMMAP
 426        struct kinfo_vmentry *freep;
 427        int i, cnt;
 428
 429        freep = kinfo_getvmmap(getpid(), &cnt);
 430        if (freep) {
 431            mmap_lock();
 432            for (i = 0; i < cnt; i++) {
 433                unsigned long startaddr, endaddr;
 434
 435                startaddr = freep[i].kve_start;
 436                endaddr = freep[i].kve_end;
 437                if (h2g_valid(startaddr)) {
 438                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 439
 440                    if (h2g_valid(endaddr)) {
 441                        endaddr = h2g(endaddr);
 442                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 443                    } else {
 444#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 445                        endaddr = ~0ul;
 446                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 447#endif
 448                    }
 449                }
 450            }
 451            free(freep);
 452            mmap_unlock();
 453        }
 454#else
 455        FILE *f;
 456
 457        last_brk = (unsigned long)sbrk(0);
 458
 459        f = fopen("/compat/linux/proc/self/maps", "r");
 460        if (f) {
 461            mmap_lock();
 462
 463            do {
 464                unsigned long startaddr, endaddr;
 465                int n;
 466
 467                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 468
 469                if (n == 2 && h2g_valid(startaddr)) {
 470                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 471
 472                    if (h2g_valid(endaddr)) {
 473                        endaddr = h2g(endaddr);
 474                    } else {
 475                        endaddr = ~0ul;
 476                    }
 477                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 478                }
 479            } while (!feof(f));
 480
 481            fclose(f);
 482            mmap_unlock();
 483        }
 484#endif
 485    }
 486#endif
 487}
 488
 489static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 490{
 491    PageDesc *pd;
 492    void **lp;
 493    int i;
 494
 495    /* Level 1.  Always allocated.  */
 496    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 497
 498    /* Level 2..N-1.  */
 499    for (i = v_l2_levels; i > 0; i--) {
 500        void **p = atomic_rcu_read(lp);
 501
 502        if (p == NULL) {
 503            void *existing;
 504
 505            if (!alloc) {
 506                return NULL;
 507            }
 508            p = g_new0(void *, V_L2_SIZE);
 509            existing = atomic_cmpxchg(lp, NULL, p);
 510            if (unlikely(existing)) {
 511                g_free(p);
 512                p = existing;
 513            }
 514        }
 515
 516        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 517    }
 518
 519    pd = atomic_rcu_read(lp);
 520    if (pd == NULL) {
 521        void *existing;
 522
 523        if (!alloc) {
 524            return NULL;
 525        }
 526        pd = g_new0(PageDesc, V_L2_SIZE);
 527#ifndef CONFIG_USER_ONLY
 528        {
 529            int i;
 530
 531            for (i = 0; i < V_L2_SIZE; i++) {
 532                qemu_spin_init(&pd[i].lock);
 533            }
 534        }
 535#endif
 536        existing = atomic_cmpxchg(lp, NULL, pd);
 537        if (unlikely(existing)) {
 538            g_free(pd);
 539            pd = existing;
 540        }
 541    }
 542
 543    return pd + (index & (V_L2_SIZE - 1));
 544}
 545
 546static inline PageDesc *page_find(tb_page_addr_t index)
 547{
 548    return page_find_alloc(index, 0);
 549}
 550
 551static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 552                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
 553
 554/* In user-mode page locks aren't used; mmap_lock is enough */
 555#ifdef CONFIG_USER_ONLY
 556
 557#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
 558
 559static inline void page_lock(PageDesc *pd)
 560{ }
 561
 562static inline void page_unlock(PageDesc *pd)
 563{ }
 564
 565static inline void page_lock_tb(const TranslationBlock *tb)
 566{ }
 567
 568static inline void page_unlock_tb(const TranslationBlock *tb)
 569{ }
 570
 571struct page_collection *
 572page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 573{
 574    return NULL;
 575}
 576
 577void page_collection_unlock(struct page_collection *set)
 578{ }
 579#else /* !CONFIG_USER_ONLY */
 580
 581#ifdef CONFIG_DEBUG_TCG
 582
 583static __thread GHashTable *ht_pages_locked_debug;
 584
 585static void ht_pages_locked_debug_init(void)
 586{
 587    if (ht_pages_locked_debug) {
 588        return;
 589    }
 590    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 591}
 592
 593static bool page_is_locked(const PageDesc *pd)
 594{
 595    PageDesc *found;
 596
 597    ht_pages_locked_debug_init();
 598    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 599    return !!found;
 600}
 601
 602static void page_lock__debug(PageDesc *pd)
 603{
 604    ht_pages_locked_debug_init();
 605    g_assert(!page_is_locked(pd));
 606    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 607}
 608
 609static void page_unlock__debug(const PageDesc *pd)
 610{
 611    bool removed;
 612
 613    ht_pages_locked_debug_init();
 614    g_assert(page_is_locked(pd));
 615    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 616    g_assert(removed);
 617}
 618
 619static void
 620do_assert_page_locked(const PageDesc *pd, const char *file, int line)
 621{
 622    if (unlikely(!page_is_locked(pd))) {
 623        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 624                     pd, file, line);
 625        abort();
 626    }
 627}
 628
 629#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
 630
 631void assert_no_pages_locked(void)
 632{
 633    ht_pages_locked_debug_init();
 634    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 635}
 636
 637#else /* !CONFIG_DEBUG_TCG */
 638
 639#define assert_page_locked(pd)
 640
 641static inline void page_lock__debug(const PageDesc *pd)
 642{
 643}
 644
 645static inline void page_unlock__debug(const PageDesc *pd)
 646{
 647}
 648
 649#endif /* CONFIG_DEBUG_TCG */
 650
 651static inline void page_lock(PageDesc *pd)
 652{
 653    page_lock__debug(pd);
 654    qemu_spin_lock(&pd->lock);
 655}
 656
 657static inline void page_unlock(PageDesc *pd)
 658{
 659    qemu_spin_unlock(&pd->lock);
 660    page_unlock__debug(pd);
 661}
 662
 663/* lock the page(s) of a TB in the correct acquisition order */
 664static inline void page_lock_tb(const TranslationBlock *tb)
 665{
 666    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
 667}
 668
 669static inline void page_unlock_tb(const TranslationBlock *tb)
 670{
 671    PageDesc *p1 = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 672
 673    page_unlock(p1);
 674    if (unlikely(tb->page_addr[1] != -1)) {
 675        PageDesc *p2 = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 676
 677        if (p2 != p1) {
 678            page_unlock(p2);
 679        }
 680    }
 681}
 682
 683static inline struct page_entry *
 684page_entry_new(PageDesc *pd, tb_page_addr_t index)
 685{
 686    struct page_entry *pe = g_malloc(sizeof(*pe));
 687
 688    pe->index = index;
 689    pe->pd = pd;
 690    pe->locked = false;
 691    return pe;
 692}
 693
 694static void page_entry_destroy(gpointer p)
 695{
 696    struct page_entry *pe = p;
 697
 698    g_assert(pe->locked);
 699    page_unlock(pe->pd);
 700    g_free(pe);
 701}
 702
 703/* returns false on success */
 704static bool page_entry_trylock(struct page_entry *pe)
 705{
 706    bool busy;
 707
 708    busy = qemu_spin_trylock(&pe->pd->lock);
 709    if (!busy) {
 710        g_assert(!pe->locked);
 711        pe->locked = true;
 712        page_lock__debug(pe->pd);
 713    }
 714    return busy;
 715}
 716
 717static void do_page_entry_lock(struct page_entry *pe)
 718{
 719    page_lock(pe->pd);
 720    g_assert(!pe->locked);
 721    pe->locked = true;
 722}
 723
 724static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 725{
 726    struct page_entry *pe = value;
 727
 728    do_page_entry_lock(pe);
 729    return FALSE;
 730}
 731
 732static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 733{
 734    struct page_entry *pe = value;
 735
 736    if (pe->locked) {
 737        pe->locked = false;
 738        page_unlock(pe->pd);
 739    }
 740    return FALSE;
 741}
 742
 743/*
 744 * Trylock a page, and if successful, add the page to a collection.
 745 * Returns true ("busy") if the page could not be locked; false otherwise.
 746 */
 747static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 748{
 749    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 750    struct page_entry *pe;
 751    PageDesc *pd;
 752
 753    pe = g_tree_lookup(set->tree, &index);
 754    if (pe) {
 755        return false;
 756    }
 757
 758    pd = page_find(index);
 759    if (pd == NULL) {
 760        return false;
 761    }
 762
 763    pe = page_entry_new(pd, index);
 764    g_tree_insert(set->tree, &pe->index, pe);
 765
 766    /*
 767     * If this is either (1) the first insertion or (2) a page whose index
 768     * is higher than any other so far, just lock the page and move on.
 769     */
 770    if (set->max == NULL || pe->index > set->max->index) {
 771        set->max = pe;
 772        do_page_entry_lock(pe);
 773        return false;
 774    }
 775    /*
 776     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 777     * locks in order.
 778     */
 779    return page_entry_trylock(pe);
 780}
 781
 782static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 783{
 784    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 785    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 786
 787    if (a == b) {
 788        return 0;
 789    } else if (a < b) {
 790        return -1;
 791    }
 792    return 1;
 793}
 794
 795/*
 796 * Lock a range of pages ([@start,@end[) as well as the pages of all
 797 * intersecting TBs.
 798 * Locking order: acquire locks in ascending order of page index.
 799 */
 800struct page_collection *
 801page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 802{
 803    struct page_collection *set = g_malloc(sizeof(*set));
 804    tb_page_addr_t index;
 805    PageDesc *pd;
 806
 807    start >>= TARGET_PAGE_BITS;
 808    end   >>= TARGET_PAGE_BITS;
 809    g_assert(start <= end);
 810
 811    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 812                                page_entry_destroy);
 813    set->max = NULL;
 814    assert_no_pages_locked();
 815
 816 retry:
 817    g_tree_foreach(set->tree, page_entry_lock, NULL);
 818
 819    for (index = start; index <= end; index++) {
 820        TranslationBlock *tb;
 821        int n;
 822
 823        pd = page_find(index);
 824        if (pd == NULL) {
 825            continue;
 826        }
 827        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 828            g_tree_foreach(set->tree, page_entry_unlock, NULL);
 829            goto retry;
 830        }
 831        assert_page_locked(pd);
 832        PAGE_FOR_EACH_TB(pd, tb, n) {
 833            if (page_trylock_add(set, tb->page_addr[0]) ||
 834                (tb->page_addr[1] != -1 &&
 835                 page_trylock_add(set, tb->page_addr[1]))) {
 836                /* drop all locks, and reacquire in order */
 837                g_tree_foreach(set->tree, page_entry_unlock, NULL);
 838                goto retry;
 839            }
 840        }
 841    }
 842    return set;
 843}
 844
 845void page_collection_unlock(struct page_collection *set)
 846{
 847    /* entries are unlocked and freed via page_entry_destroy */
 848    g_tree_destroy(set->tree);
 849    g_free(set);
 850}
 851
 852#endif /* !CONFIG_USER_ONLY */
 853
 854static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 855                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
 856{
 857    PageDesc *p1, *p2;
 858    tb_page_addr_t page1;
 859    tb_page_addr_t page2;
 860
 861    assert_memory_lock();
 862    g_assert(phys1 != -1);
 863
 864    page1 = phys1 >> TARGET_PAGE_BITS;
 865    page2 = phys2 >> TARGET_PAGE_BITS;
 866
 867    p1 = page_find_alloc(page1, alloc);
 868    if (ret_p1) {
 869        *ret_p1 = p1;
 870    }
 871    if (likely(phys2 == -1)) {
 872        page_lock(p1);
 873        return;
 874    } else if (page1 == page2) {
 875        page_lock(p1);
 876        if (ret_p2) {
 877            *ret_p2 = p1;
 878        }
 879        return;
 880    }
 881    p2 = page_find_alloc(page2, alloc);
 882    if (ret_p2) {
 883        *ret_p2 = p2;
 884    }
 885    if (page1 < page2) {
 886        page_lock(p1);
 887        page_lock(p2);
 888    } else {
 889        page_lock(p2);
 890        page_lock(p1);
 891    }
 892}
 893
 894#if defined(CONFIG_USER_ONLY)
 895/* Currently it is not recommended to allocate big chunks of data in
 896   user mode. It will change when a dedicated libc will be used.  */
 897/* ??? 64-bit hosts ought to have no problem mmaping data outside the
 898   region in which the guest needs to run.  Revisit this.  */
 899#define USE_STATIC_CODE_GEN_BUFFER
 900#endif
 901
 902/* Minimum size of the code gen buffer.  This number is randomly chosen,
 903   but not so small that we can't have a fair number of TB's live.  */
 904#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
 905
 906/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
 907   indicated, this is constrained by the range of direct branches on the
 908   host cpu, as used by the TCG implementation of goto_tb.  */
 909#if defined(__x86_64__)
 910# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 911#elif defined(__sparc__)
 912# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 913#elif defined(__powerpc64__)
 914# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 915#elif defined(__powerpc__)
 916# define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 917#elif defined(__aarch64__)
 918# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 919#elif defined(__s390x__)
 920  /* We have a +- 4GB range on the branches; leave some slop.  */
 921# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
 922#elif defined(__mips__)
 923  /* We have a 256MB branch region, but leave room to make sure the
 924     main executable is also within that region.  */
 925# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
 926#else
 927# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 928#endif
 929
 930#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
 931
 932#define DEFAULT_CODE_GEN_BUFFER_SIZE \
 933  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 934   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 935
 936static inline size_t size_code_gen_buffer(size_t tb_size)
 937{
 938    /* Size the buffer.  */
 939    if (tb_size == 0) {
 940#ifdef USE_STATIC_CODE_GEN_BUFFER
 941        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 942#else
 943        /* ??? Needs adjustments.  */
 944        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
 945           static buffer, we could size this on RESERVED_VA, on the text
 946           segment size of the executable, or continue to use the default.  */
 947        tb_size = (unsigned long)(ram_size / 4);
 948#endif
 949    }
 950    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 951        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 952    }
 953    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 954        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 955    }
 956    return tb_size;
 957}
 958
 959#ifdef __mips__
 960/* In order to use J and JAL within the code_gen_buffer, we require
 961   that the buffer not cross a 256MB boundary.  */
 962static inline bool cross_256mb(void *addr, size_t size)
 963{
 964    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
 965}
 966
 967/* We weren't able to allocate a buffer without crossing that boundary,
 968   so make do with the larger portion of the buffer that doesn't cross.
 969   Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
 970static inline void *split_cross_256mb(void *buf1, size_t size1)
 971{
 972    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
 973    size_t size2 = buf1 + size1 - buf2;
 974
 975    size1 = buf2 - buf1;
 976    if (size1 < size2) {
 977        size1 = size2;
 978        buf1 = buf2;
 979    }
 980
 981    tcg_ctx->code_gen_buffer_size = size1;
 982    return buf1;
 983}
 984#endif
 985
 986#ifdef USE_STATIC_CODE_GEN_BUFFER
 987static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 988    __attribute__((aligned(CODE_GEN_ALIGN)));
 989
 990static inline void *alloc_code_gen_buffer(void)
 991{
 992    void *buf = static_code_gen_buffer;
 993    void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 994    size_t size;
 995
 996    /* page-align the beginning and end of the buffer */
 997    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
 998    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
 999
1000    size = end - buf;
1001
1002    /* Honor a command-line option limiting the size of the buffer.  */
1003    if (size > tcg_ctx->code_gen_buffer_size) {
1004        size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size,
1005                               qemu_real_host_page_size);
1006    }
1007    tcg_ctx->code_gen_buffer_size = size;
1008
1009#ifdef __mips__
1010    if (cross_256mb(buf, size)) {
1011        buf = split_cross_256mb(buf, size);
1012        size = tcg_ctx->code_gen_buffer_size;
1013    }
1014#endif
1015
1016    if (qemu_mprotect_rwx(buf, size)) {
1017        abort();
1018    }
1019    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1020
1021    return buf;
1022}
1023#elif defined(_WIN32)
1024static inline void *alloc_code_gen_buffer(void)
1025{
1026    size_t size = tcg_ctx->code_gen_buffer_size;
1027    return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
1028                        PAGE_EXECUTE_READWRITE);
1029}
1030#else
1031static inline void *alloc_code_gen_buffer(void)
1032{
1033    int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
1034    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
1035    uintptr_t start = 0;
1036    size_t size = tcg_ctx->code_gen_buffer_size;
1037    void *buf;
1038
1039    /* Constrain the position of the buffer based on the host cpu.
1040       Note that these addresses are chosen in concert with the
1041       addresses assigned in the relevant linker script file.  */
1042# if defined(__PIE__) || defined(__PIC__)
1043    /* Don't bother setting a preferred location if we're building
1044       a position-independent executable.  We're more likely to get
1045       an address near the main executable if we let the kernel
1046       choose the address.  */
1047# elif defined(__x86_64__) && defined(MAP_32BIT)
1048    /* Force the memory down into low memory with the executable.
1049       Leave the choice of exact location with the kernel.  */
1050    flags |= MAP_32BIT;
1051    /* Cannot expect to map more than 800MB in low memory.  */
1052    if (size > 800u * 1024 * 1024) {
1053        tcg_ctx->code_gen_buffer_size = size = 800u * 1024 * 1024;
1054    }
1055# elif defined(__sparc__)
1056    start = 0x40000000ul;
1057# elif defined(__s390x__)
1058    start = 0x90000000ul;
1059# elif defined(__mips__)
1060#  if _MIPS_SIM == _ABI64
1061    start = 0x128000000ul;
1062#  else
1063    start = 0x08000000ul;
1064#  endif
1065# endif
1066
1067    buf = mmap((void *)start, size, prot, flags, -1, 0);
1068    if (buf == MAP_FAILED) {
1069        return NULL;
1070    }
1071
1072#ifdef __mips__
1073    if (cross_256mb(buf, size)) {
1074        /* Try again, with the original still mapped, to avoid re-acquiring
1075           that 256mb crossing.  This time don't specify an address.  */
1076        size_t size2;
1077        void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
1078        switch ((int)(buf2 != MAP_FAILED)) {
1079        case 1:
1080            if (!cross_256mb(buf2, size)) {
1081                /* Success!  Use the new buffer.  */
1082                munmap(buf, size);
1083                break;
1084            }
1085            /* Failure.  Work with what we had.  */
1086            munmap(buf2, size);
1087            /* fallthru */
1088        default:
1089            /* Split the original buffer.  Free the smaller half.  */
1090            buf2 = split_cross_256mb(buf, size);
1091            size2 = tcg_ctx->code_gen_buffer_size;
1092            if (buf == buf2) {
1093                munmap(buf + size2, size - size2);
1094            } else {
1095                munmap(buf, size - size2);
1096            }
1097            size = size2;
1098            break;
1099        }
1100        buf = buf2;
1101    }
1102#endif
1103
1104    /* Request large pages for the buffer.  */
1105    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
1106
1107    return buf;
1108}
1109#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
1110
1111static inline void code_gen_alloc(size_t tb_size)
1112{
1113    tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size);
1114    tcg_ctx->code_gen_buffer = alloc_code_gen_buffer();
1115    if (tcg_ctx->code_gen_buffer == NULL) {
1116        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
1117        exit(1);
1118    }
1119}
1120
1121static bool tb_cmp(const void *ap, const void *bp)
1122{
1123    const TranslationBlock *a = ap;
1124    const TranslationBlock *b = bp;
1125
1126    return a->pc == b->pc &&
1127        a->cs_base == b->cs_base &&
1128        a->flags == b->flags &&
1129        (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
1130        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
1131        a->page_addr[0] == b->page_addr[0] &&
1132        a->page_addr[1] == b->page_addr[1];
1133}
1134
1135static void tb_htable_init(void)
1136{
1137    unsigned int mode = QHT_MODE_AUTO_RESIZE;
1138
1139    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
1140}
1141
1142/* Must be called before using the QEMU cpus. 'tb_size' is the size
1143   (in bytes) allocated to the translation buffer. Zero means default
1144   size. */
1145void tcg_exec_init(unsigned long tb_size)
1146{
1147    tcg_allowed = true;
1148    cpu_gen_init();
1149    page_init();
1150    tb_htable_init();
1151    code_gen_alloc(tb_size);
1152#if defined(CONFIG_SOFTMMU)
1153    /* There's no guest base to take into account, so go ahead and
1154       initialize the prologue now.  */
1155    tcg_prologue_init(tcg_ctx);
1156#endif
1157}
1158
1159/*
1160 * Allocate a new translation block. Flush the translation buffer if
1161 * too many translation blocks or too much generated code.
1162 */
1163static TranslationBlock *tb_alloc(target_ulong pc)
1164{
1165    TranslationBlock *tb;
1166
1167    assert_memory_lock();
1168
1169    tb = tcg_tb_alloc(tcg_ctx);
1170    if (unlikely(tb == NULL)) {
1171        return NULL;
1172    }
1173    return tb;
1174}
1175
1176/* call with @p->lock held */
1177static inline void invalidate_page_bitmap(PageDesc *p)
1178{
1179    assert_page_locked(p);
1180#ifdef CONFIG_SOFTMMU
1181    g_free(p->code_bitmap);
1182    p->code_bitmap = NULL;
1183    p->code_write_count = 0;
1184#endif
1185}
1186
1187/* Set to NULL all the 'first_tb' fields in all PageDescs. */
1188static void page_flush_tb_1(int level, void **lp)
1189{
1190    int i;
1191
1192    if (*lp == NULL) {
1193        return;
1194    }
1195    if (level == 0) {
1196        PageDesc *pd = *lp;
1197
1198        for (i = 0; i < V_L2_SIZE; ++i) {
1199            page_lock(&pd[i]);
1200            pd[i].first_tb = (uintptr_t)NULL;
1201            invalidate_page_bitmap(pd + i);
1202            page_unlock(&pd[i]);
1203        }
1204    } else {
1205        void **pp = *lp;
1206
1207        for (i = 0; i < V_L2_SIZE; ++i) {
1208            page_flush_tb_1(level - 1, pp + i);
1209        }
1210    }
1211}
1212
1213static void page_flush_tb(void)
1214{
1215    int i, l1_sz = v_l1_size;
1216
1217    for (i = 0; i < l1_sz; i++) {
1218        page_flush_tb_1(v_l2_levels, l1_map + i);
1219    }
1220}
1221
1222static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
1223{
1224    const TranslationBlock *tb = value;
1225    size_t *size = data;
1226
1227    *size += tb->tc.size;
1228    return false;
1229}
1230
1231/* flush all the translation blocks */
1232static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
1233{
1234    mmap_lock();
1235    /* If it is already been done on request of another CPU,
1236     * just retry.
1237     */
1238    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
1239        goto done;
1240    }
1241
1242    if (DEBUG_TB_FLUSH_GATE) {
1243        size_t nb_tbs = tcg_nb_tbs();
1244        size_t host_size = 0;
1245
1246        tcg_tb_foreach(tb_host_size_iter, &host_size);
1247        printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
1248               tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
1249    }
1250
1251    CPU_FOREACH(cpu) {
1252        cpu_tb_jmp_cache_clear(cpu);
1253    }
1254
1255    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
1256    page_flush_tb();
1257
1258    tcg_region_reset_all();
1259    /* XXX: flush processor icache at this point if cache flush is
1260       expensive */
1261    atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
1262
1263done:
1264    mmap_unlock();
1265}
1266
1267void tb_flush(CPUState *cpu)
1268{
1269    if (tcg_enabled()) {
1270        unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count);
1271        async_safe_run_on_cpu(cpu, do_tb_flush,
1272                              RUN_ON_CPU_HOST_INT(tb_flush_count));
1273    }
1274}
1275
1276/*
1277 * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
1278 * so in order to prevent bit rot we compile them unconditionally in user-mode,
1279 * and let the optimizer get rid of them by wrapping their user-only callers
1280 * with if (DEBUG_TB_CHECK_GATE).
1281 */
1282#ifdef CONFIG_USER_ONLY
1283
1284static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
1285{
1286    TranslationBlock *tb = p;
1287    target_ulong addr = *(target_ulong *)userp;
1288
1289    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
1290        printf("ERROR invalidate: address=" TARGET_FMT_lx
1291               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
1292    }
1293}
1294
1295/* verify that all the pages have correct rights for code
1296 *
1297 * Called with mmap_lock held.
1298 */
1299static void tb_invalidate_check(target_ulong address)
1300{
1301    address &= TARGET_PAGE_MASK;
1302    qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address);
1303}
1304
1305static void do_tb_page_check(void *p, uint32_t hash, void *userp)
1306{
1307    TranslationBlock *tb = p;
1308    int flags1, flags2;
1309
1310    flags1 = page_get_flags(tb->pc);
1311    flags2 = page_get_flags(tb->pc + tb->size - 1);
1312    if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
1313        printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
1314               (long)tb->pc, tb->size, flags1, flags2);
1315    }
1316}
1317
1318/* verify that all the pages have correct rights for code */
1319static void tb_page_check(void)
1320{
1321    qht_iter(&tb_ctx.htable, do_tb_page_check, NULL);
1322}
1323
1324#endif /* CONFIG_USER_ONLY */
1325
1326/*
1327 * user-mode: call with mmap_lock held
1328 * !user-mode: call with @pd->lock held
1329 */
1330static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
1331{
1332    TranslationBlock *tb1;
1333    uintptr_t *pprev;
1334    unsigned int n1;
1335
1336    assert_page_locked(pd);
1337    pprev = &pd->first_tb;
1338    PAGE_FOR_EACH_TB(pd, tb1, n1) {
1339        if (tb1 == tb) {
1340            *pprev = tb1->page_next[n1];
1341            return;
1342        }
1343        pprev = &tb1->page_next[n1];
1344    }
1345    g_assert_not_reached();
1346}
1347
1348/* remove @orig from its @n_orig-th jump list */
1349static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
1350{
1351    uintptr_t ptr, ptr_locked;
1352    TranslationBlock *dest;
1353    TranslationBlock *tb;
1354    uintptr_t *pprev;
1355    int n;
1356
1357    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
1358    ptr = atomic_or_fetch(&orig->jmp_dest[n_orig], 1);
1359    dest = (TranslationBlock *)(ptr & ~1);
1360    if (dest == NULL) {
1361        return;
1362    }
1363
1364    qemu_spin_lock(&dest->jmp_lock);
1365    /*
1366     * While acquiring the lock, the jump might have been removed if the
1367     * destination TB was invalidated; check again.
1368     */
1369    ptr_locked = atomic_read(&orig->jmp_dest[n_orig]);
1370    if (ptr_locked != ptr) {
1371        qemu_spin_unlock(&dest->jmp_lock);
1372        /*
1373         * The only possibility is that the jump was unlinked via
1374         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
1375         * because we set the LSB above.
1376         */
1377        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
1378        return;
1379    }
1380    /*
1381     * We first acquired the lock, and since the destination pointer matches,
1382     * we know for sure that @orig is in the jmp list.
1383     */
1384    pprev = &dest->jmp_list_head;
1385    TB_FOR_EACH_JMP(dest, tb, n) {
1386        if (tb == orig && n == n_orig) {
1387            *pprev = tb->jmp_list_next[n];
1388            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
1389            qemu_spin_unlock(&dest->jmp_lock);
1390            return;
1391        }
1392        pprev = &tb->jmp_list_next[n];
1393    }
1394    g_assert_not_reached();
1395}
1396
1397/* reset the jump entry 'n' of a TB so that it is not chained to
1398   another TB */
1399static inline void tb_reset_jump(TranslationBlock *tb, int n)
1400{
1401    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
1402    tb_set_jmp_target(tb, n, addr);
1403}
1404
1405/* remove any jumps to the TB */
1406static inline void tb_jmp_unlink(TranslationBlock *dest)
1407{
1408    TranslationBlock *tb;
1409    int n;
1410
1411    qemu_spin_lock(&dest->jmp_lock);
1412
1413    TB_FOR_EACH_JMP(dest, tb, n) {
1414        tb_reset_jump(tb, n);
1415        atomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
1416        /* No need to clear the list entry; setting the dest ptr is enough */
1417    }
1418    dest->jmp_list_head = (uintptr_t)NULL;
1419
1420    qemu_spin_unlock(&dest->jmp_lock);
1421}
1422
1423/*
1424 * In user-mode, call with mmap_lock held.
1425 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
1426 * locks held.
1427 */
1428static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
1429{
1430    CPUState *cpu;
1431    PageDesc *p;
1432    uint32_t h;
1433    tb_page_addr_t phys_pc;
1434
1435    assert_memory_lock();
1436
1437    /* make sure no further incoming jumps will be chained to this TB */
1438    qemu_spin_lock(&tb->jmp_lock);
1439    atomic_set(&tb->cflags, tb->cflags | CF_INVALID);
1440    qemu_spin_unlock(&tb->jmp_lock);
1441
1442    /* remove the TB from the hash list */
1443    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1444    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
1445                     tb->trace_vcpu_dstate);
1446    if (!(tb->cflags & CF_NOCACHE) &&
1447        !qht_remove(&tb_ctx.htable, tb, h)) {
1448        return;
1449    }
1450
1451    /* remove the TB from the page list */
1452    if (rm_from_page_list) {
1453        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
1454        tb_page_remove(p, tb);
1455        invalidate_page_bitmap(p);
1456        if (tb->page_addr[1] != -1) {
1457            p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
1458            tb_page_remove(p, tb);
1459            invalidate_page_bitmap(p);
1460        }
1461    }
1462
1463    /* remove the TB from the hash list */
1464    h = tb_jmp_cache_hash_func(tb->pc);
1465    CPU_FOREACH(cpu) {
1466        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
1467            atomic_set(&cpu->tb_jmp_cache[h], NULL);
1468        }
1469    }
1470
1471    /* suppress this TB from the two jump lists */
1472    tb_remove_from_jmp_list(tb, 0);
1473    tb_remove_from_jmp_list(tb, 1);
1474
1475    /* suppress any remaining jumps to this TB */
1476    tb_jmp_unlink(tb);
1477
1478    atomic_set(&tcg_ctx->tb_phys_invalidate_count,
1479               tcg_ctx->tb_phys_invalidate_count + 1);
1480}
1481
1482static void tb_phys_invalidate__locked(TranslationBlock *tb)
1483{
1484    do_tb_phys_invalidate(tb, true);
1485}
1486
1487/* invalidate one TB
1488 *
1489 * Called with mmap_lock held in user-mode.
1490 */
1491void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
1492{
1493    if (page_addr == -1 && tb->page_addr[0] != -1) {
1494        page_lock_tb(tb);
1495        do_tb_phys_invalidate(tb, true);
1496        page_unlock_tb(tb);
1497    } else {
1498        do_tb_phys_invalidate(tb, false);
1499    }
1500}
1501
1502#ifdef CONFIG_SOFTMMU
1503/* call with @p->lock held */
1504static void build_page_bitmap(PageDesc *p)
1505{
1506    int n, tb_start, tb_end;
1507    TranslationBlock *tb;
1508
1509    assert_page_locked(p);
1510    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
1511
1512    PAGE_FOR_EACH_TB(p, tb, n) {
1513        /* NOTE: this is subtle as a TB may span two physical pages */
1514        if (n == 0) {
1515            /* NOTE: tb_end may be after the end of the page, but
1516               it is not a problem */
1517            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1518            tb_end = tb_start + tb->size;
1519            if (tb_end > TARGET_PAGE_SIZE) {
1520                tb_end = TARGET_PAGE_SIZE;
1521             }
1522        } else {
1523            tb_start = 0;
1524            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1525        }
1526        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
1527    }
1528}
1529#endif
1530
1531/* add the tb in the target page and protect it if necessary
1532 *
1533 * Called with mmap_lock held for user-mode emulation.
1534 * Called with @p->lock held in !user-mode.
1535 */
1536static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
1537                               unsigned int n, tb_page_addr_t page_addr)
1538{
1539#ifndef CONFIG_USER_ONLY
1540    bool page_already_protected;
1541#endif
1542
1543    assert_page_locked(p);
1544
1545    tb->page_addr[n] = page_addr;
1546    tb->page_next[n] = p->first_tb;
1547#ifndef CONFIG_USER_ONLY
1548    page_already_protected = p->first_tb != (uintptr_t)NULL;
1549#endif
1550    p->first_tb = (uintptr_t)tb | n;
1551    invalidate_page_bitmap(p);
1552
1553#if defined(CONFIG_USER_ONLY)
1554    if (p->flags & PAGE_WRITE) {
1555        target_ulong addr;
1556        PageDesc *p2;
1557        int prot;
1558
1559        /* force the host page as non writable (writes will have a
1560           page fault + mprotect overhead) */
1561        page_addr &= qemu_host_page_mask;
1562        prot = 0;
1563        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1564            addr += TARGET_PAGE_SIZE) {
1565
1566            p2 = page_find(addr >> TARGET_PAGE_BITS);
1567            if (!p2) {
1568                continue;
1569            }
1570            prot |= p2->flags;
1571            p2->flags &= ~PAGE_WRITE;
1572          }
1573        mprotect(g2h(page_addr), qemu_host_page_size,
1574                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1575        if (DEBUG_TB_INVALIDATE_GATE) {
1576            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
1577        }
1578    }
1579#else
1580    /* if some code is already present, then the pages are already
1581       protected. So we handle the case where only the first TB is
1582       allocated in a physical page */
1583    if (!page_already_protected) {
1584        tlb_protect_code(page_addr);
1585    }
1586#endif
1587}
1588
1589/* add a new TB and link it to the physical page tables. phys_page2 is
1590 * (-1) to indicate that only one page contains the TB.
1591 *
1592 * Called with mmap_lock held for user-mode emulation.
1593 *
1594 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
1595 * Note that in !user-mode, another thread might have already added a TB
1596 * for the same block of guest code that @tb corresponds to. In that case,
1597 * the caller should discard the original @tb, and use instead the returned TB.
1598 */
1599static TranslationBlock *
1600tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1601             tb_page_addr_t phys_page2)
1602{
1603    PageDesc *p;
1604    PageDesc *p2 = NULL;
1605
1606    assert_memory_lock();
1607
1608    if (phys_pc == -1) {
1609        /*
1610         * If the TB is not associated with a physical RAM page then
1611         * it must be a temporary one-insn TB, and we have nothing to do
1612         * except fill in the page_addr[] fields.
1613         */
1614        assert(tb->cflags & CF_NOCACHE);
1615        tb->page_addr[0] = tb->page_addr[1] = -1;
1616        return tb;
1617    }
1618
1619    /*
1620     * Add the TB to the page list, acquiring first the pages's locks.
1621     * We keep the locks held until after inserting the TB in the hash table,
1622     * so that if the insertion fails we know for sure that the TBs are still
1623     * in the page descriptors.
1624     * Note that inserting into the hash table first isn't an option, since
1625     * we can only insert TBs that are fully initialized.
1626     */
1627    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
1628    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
1629    if (p2) {
1630        tb_page_add(p2, tb, 1, phys_page2);
1631    } else {
1632        tb->page_addr[1] = -1;
1633    }
1634
1635    if (!(tb->cflags & CF_NOCACHE)) {
1636        void *existing_tb = NULL;
1637        uint32_t h;
1638
1639        /* add in the hash table */
1640        h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
1641                         tb->trace_vcpu_dstate);
1642        qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
1643
1644        /* remove TB from the page(s) if we couldn't insert it */
1645        if (unlikely(existing_tb)) {
1646            tb_page_remove(p, tb);
1647            invalidate_page_bitmap(p);
1648            if (p2) {
1649                tb_page_remove(p2, tb);
1650                invalidate_page_bitmap(p2);
1651            }
1652            tb = existing_tb;
1653        }
1654    }
1655
1656    if (p2 && p2 != p) {
1657        page_unlock(p2);
1658    }
1659    page_unlock(p);
1660
1661#ifdef CONFIG_USER_ONLY
1662    if (DEBUG_TB_CHECK_GATE) {
1663        tb_page_check();
1664    }
1665#endif
1666    return tb;
1667}
1668
1669/* Called with mmap_lock held for user mode emulation.  */
1670TranslationBlock *tb_gen_code(CPUState *cpu,
1671                              target_ulong pc, target_ulong cs_base,
1672                              uint32_t flags, int cflags)
1673{
1674    CPUArchState *env = cpu->env_ptr;
1675    TranslationBlock *tb, *existing_tb;
1676    tb_page_addr_t phys_pc, phys_page2;
1677    target_ulong virt_page2;
1678    tcg_insn_unit *gen_code_buf;
1679    int gen_code_size, search_size, max_insns;
1680#ifdef CONFIG_PROFILER
1681    TCGProfile *prof = &tcg_ctx->prof;
1682    int64_t ti;
1683#endif
1684    assert_memory_lock();
1685
1686    phys_pc = get_page_addr_code(env, pc);
1687
1688    if (phys_pc == -1) {
1689        /* Generate a temporary TB with 1 insn in it */
1690        cflags &= ~CF_COUNT_MASK;
1691        cflags |= CF_NOCACHE | 1;
1692    }
1693
1694    cflags &= ~CF_CLUSTER_MASK;
1695    cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
1696
1697    max_insns = cflags & CF_COUNT_MASK;
1698    if (max_insns == 0) {
1699        max_insns = CF_COUNT_MASK;
1700    }
1701    if (max_insns > TCG_MAX_INSNS) {
1702        max_insns = TCG_MAX_INSNS;
1703    }
1704    if (cpu->singlestep_enabled || singlestep) {
1705        max_insns = 1;
1706    }
1707
1708 buffer_overflow:
1709    tb = tb_alloc(pc);
1710    if (unlikely(!tb)) {
1711        /* flush must be done */
1712        tb_flush(cpu);
1713        mmap_unlock();
1714        /* Make the execution loop process the flush as soon as possible.  */
1715        cpu->exception_index = EXCP_INTERRUPT;
1716        cpu_loop_exit(cpu);
1717    }
1718
1719    gen_code_buf = tcg_ctx->code_gen_ptr;
1720    tb->tc.ptr = gen_code_buf;
1721    tb->pc = pc;
1722    tb->cs_base = cs_base;
1723    tb->flags = flags;
1724    tb->cflags = cflags;
1725    tb->trace_vcpu_dstate = *cpu->trace_dstate;
1726    tcg_ctx->tb_cflags = cflags;
1727 tb_overflow:
1728
1729#ifdef CONFIG_PROFILER
1730    /* includes aborted translations because of exceptions */
1731    atomic_set(&prof->tb_count1, prof->tb_count1 + 1);
1732    ti = profile_getclock();
1733#endif
1734
1735    tcg_func_start(tcg_ctx);
1736
1737    tcg_ctx->cpu = env_cpu(env);
1738    gen_intermediate_code(cpu, tb, max_insns);
1739    tcg_ctx->cpu = NULL;
1740
1741    trace_translate_block(tb, tb->pc, tb->tc.ptr);
1742
1743    /* generate machine code */
1744    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
1745    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
1746    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
1747    if (TCG_TARGET_HAS_direct_jump) {
1748        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
1749        tcg_ctx->tb_jmp_target_addr = NULL;
1750    } else {
1751        tcg_ctx->tb_jmp_insn_offset = NULL;
1752        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
1753    }
1754
1755#ifdef CONFIG_PROFILER
1756    atomic_set(&prof->tb_count, prof->tb_count + 1);
1757    atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti);
1758    ti = profile_getclock();
1759#endif
1760
1761    gen_code_size = tcg_gen_code(tcg_ctx, tb);
1762    if (unlikely(gen_code_size < 0)) {
1763        switch (gen_code_size) {
1764        case -1:
1765            /*
1766             * Overflow of code_gen_buffer, or the current slice of it.
1767             *
1768             * TODO: We don't need to re-do gen_intermediate_code, nor
1769             * should we re-do the tcg optimization currently hidden
1770             * inside tcg_gen_code.  All that should be required is to
1771             * flush the TBs, allocate a new TB, re-initialize it per
1772             * above, and re-do the actual code generation.
1773             */
1774            goto buffer_overflow;
1775
1776        case -2:
1777            /*
1778             * The code generated for the TranslationBlock is too large.
1779             * The maximum size allowed by the unwind info is 64k.
1780             * There may be stricter constraints from relocations
1781             * in the tcg backend.
1782             *
1783             * Try again with half as many insns as we attempted this time.
1784             * If a single insn overflows, there's a bug somewhere...
1785             */
1786            max_insns = tb->icount;
1787            assert(max_insns > 1);
1788            max_insns /= 2;
1789            goto tb_overflow;
1790
1791        default:
1792            g_assert_not_reached();
1793        }
1794    }
1795    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
1796    if (unlikely(search_size < 0)) {
1797        goto buffer_overflow;
1798    }
1799    tb->tc.size = gen_code_size;
1800
1801#ifdef CONFIG_PROFILER
1802    atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
1803    atomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
1804    atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
1805    atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
1806#endif
1807
1808#ifdef DEBUG_DISAS
1809    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
1810        qemu_log_in_addr_range(tb->pc)) {
1811        qemu_log_lock();
1812        qemu_log("OUT: [size=%d]\n", gen_code_size);
1813        if (tcg_ctx->data_gen_ptr) {
1814            size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
1815            size_t data_size = gen_code_size - code_size;
1816            size_t i;
1817
1818            log_disas(tb->tc.ptr, code_size);
1819
1820            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1821                if (sizeof(tcg_target_ulong) == 8) {
1822                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1823                             (uintptr_t)tcg_ctx->data_gen_ptr + i,
1824                             *(uint64_t *)(tcg_ctx->data_gen_ptr + i));
1825                } else {
1826                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1827                             (uintptr_t)tcg_ctx->data_gen_ptr + i,
1828                             *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
1829                }
1830            }
1831        } else {
1832            log_disas(tb->tc.ptr, gen_code_size);
1833        }
1834        qemu_log("\n");
1835        qemu_log_flush();
1836        qemu_log_unlock();
1837    }
1838#endif
1839
1840    atomic_set(&tcg_ctx->code_gen_ptr, (void *)
1841        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
1842                 CODE_GEN_ALIGN));
1843
1844    /* init jump list */
1845    qemu_spin_init(&tb->jmp_lock);
1846    tb->jmp_list_head = (uintptr_t)NULL;
1847    tb->jmp_list_next[0] = (uintptr_t)NULL;
1848    tb->jmp_list_next[1] = (uintptr_t)NULL;
1849    tb->jmp_dest[0] = (uintptr_t)NULL;
1850    tb->jmp_dest[1] = (uintptr_t)NULL;
1851
1852    /* init original jump addresses which have been set during tcg_gen_code() */
1853    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1854        tb_reset_jump(tb, 0);
1855    }
1856    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1857        tb_reset_jump(tb, 1);
1858    }
1859
1860    /* check next page if needed */
1861    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1862    phys_page2 = -1;
1863    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1864        phys_page2 = get_page_addr_code(env, virt_page2);
1865    }
1866    /*
1867     * No explicit memory barrier is required -- tb_link_page() makes the
1868     * TB visible in a consistent state.
1869     */
1870    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
1871    /* if the TB already exists, discard what we just translated */
1872    if (unlikely(existing_tb != tb)) {
1873        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1874
1875        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1876        atomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1877        return existing_tb;
1878    }
1879    tcg_tb_insert(tb);
1880    return tb;
1881}
1882
1883/*
1884 * @p must be non-NULL.
1885 * user-mode: call with mmap_lock held.
1886 * !user-mode: call with all @pages locked.
1887 */
1888static void
1889tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1890                                      PageDesc *p, tb_page_addr_t start,
1891                                      tb_page_addr_t end,
1892                                      int is_cpu_write_access)
1893{
1894    TranslationBlock *tb;
1895    tb_page_addr_t tb_start, tb_end;
1896    int n;
1897#ifdef TARGET_HAS_PRECISE_SMC
1898    CPUState *cpu = current_cpu;
1899    CPUArchState *env = NULL;
1900    int current_tb_not_found = is_cpu_write_access;
1901    TranslationBlock *current_tb = NULL;
1902    int current_tb_modified = 0;
1903    target_ulong current_pc = 0;
1904    target_ulong current_cs_base = 0;
1905    uint32_t current_flags = 0;
1906#endif /* TARGET_HAS_PRECISE_SMC */
1907
1908    assert_page_locked(p);
1909
1910#if defined(TARGET_HAS_PRECISE_SMC)
1911    if (cpu != NULL) {
1912        env = cpu->env_ptr;
1913    }
1914#endif
1915
1916    /* we remove all the TBs in the range [start, end[ */
1917    /* XXX: see if in some cases it could be faster to invalidate all
1918       the code */
1919    PAGE_FOR_EACH_TB(p, tb, n) {
1920        assert_page_locked(p);
1921        /* NOTE: this is subtle as a TB may span two physical pages */
1922        if (n == 0) {
1923            /* NOTE: tb_end may be after the end of the page, but
1924               it is not a problem */
1925            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1926            tb_end = tb_start + tb->size;
1927        } else {
1928            tb_start = tb->page_addr[1];
1929            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1930        }
1931        if (!(tb_end <= start || tb_start >= end)) {
1932#ifdef TARGET_HAS_PRECISE_SMC
1933            if (current_tb_not_found) {
1934                current_tb_not_found = 0;
1935                current_tb = NULL;
1936                if (cpu->mem_io_pc) {
1937                    /* now we have a real cpu fault */
1938                    current_tb = tcg_tb_lookup(cpu->mem_io_pc);
1939                }
1940            }
1941            if (current_tb == tb &&
1942                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1943                /* If we are modifying the current TB, we must stop
1944                its execution. We could be more precise by checking
1945                that the modification is after the current PC, but it
1946                would require a specialized function to partially
1947                restore the CPU state */
1948
1949                current_tb_modified = 1;
1950                cpu_restore_state_from_tb(cpu, current_tb,
1951                                          cpu->mem_io_pc, true);
1952                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1953                                     &current_flags);
1954            }
1955#endif /* TARGET_HAS_PRECISE_SMC */
1956            tb_phys_invalidate__locked(tb);
1957        }
1958    }
1959#if !defined(CONFIG_USER_ONLY)
1960    /* if no code remaining, no need to continue to use slow writes */
1961    if (!p->first_tb) {
1962        invalidate_page_bitmap(p);
1963        tlb_unprotect_code(start);
1964    }
1965#endif
1966#ifdef TARGET_HAS_PRECISE_SMC
1967    if (current_tb_modified) {
1968        page_collection_unlock(pages);
1969        /* Force execution of one insn next time.  */
1970        cpu->cflags_next_tb = 1 | curr_cflags();
1971        mmap_unlock();
1972        cpu_loop_exit_noexc(cpu);
1973    }
1974#endif
1975}
1976
1977/*
1978 * Invalidate all TBs which intersect with the target physical address range
1979 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1980 * 'is_cpu_write_access' should be true if called from a real cpu write
1981 * access: the virtual CPU will exit the current TB if code is modified inside
1982 * this TB.
1983 *
1984 * Called with mmap_lock held for user-mode emulation
1985 */
1986void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1987                                   int is_cpu_write_access)
1988{
1989    struct page_collection *pages;
1990    PageDesc *p;
1991
1992    assert_memory_lock();
1993
1994    p = page_find(start >> TARGET_PAGE_BITS);
1995    if (p == NULL) {
1996        return;
1997    }
1998    pages = page_collection_lock(start, end);
1999    tb_invalidate_phys_page_range__locked(pages, p, start, end,
2000                                          is_cpu_write_access);
2001    page_collection_unlock(pages);
2002}
2003
2004/*
2005 * Invalidate all TBs which intersect with the target physical address range
2006 * [start;end[. NOTE: start and end may refer to *different* physical pages.
2007 * 'is_cpu_write_access' should be true if called from a real cpu write
2008 * access: the virtual CPU will exit the current TB if code is modified inside
2009 * this TB.
2010 *
2011 * Called with mmap_lock held for user-mode emulation.
2012 */
2013#ifdef CONFIG_SOFTMMU
2014void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end)
2015#else
2016void tb_invalidate_phys_range(target_ulong start, target_ulong end)
2017#endif
2018{
2019    struct page_collection *pages;
2020    tb_page_addr_t next;
2021
2022    assert_memory_lock();
2023
2024    pages = page_collection_lock(start, end);
2025    for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
2026         start < end;
2027         start = next, next += TARGET_PAGE_SIZE) {
2028        PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
2029        tb_page_addr_t bound = MIN(next, end);
2030
2031        if (pd == NULL) {
2032            continue;
2033        }
2034        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
2035    }
2036    page_collection_unlock(pages);
2037}
2038
2039#ifdef CONFIG_SOFTMMU
2040/* len must be <= 8 and start must be a multiple of len.
2041 * Called via softmmu_template.h when code areas are written to with
2042 * iothread mutex not held.
2043 *
2044 * Call with all @pages in the range [@start, @start + len[ locked.
2045 */
2046void tb_invalidate_phys_page_fast(struct page_collection *pages,
2047                                  tb_page_addr_t start, int len)
2048{
2049    PageDesc *p;
2050
2051    assert_memory_lock();
2052
2053    p = page_find(start >> TARGET_PAGE_BITS);
2054    if (!p) {
2055        return;
2056    }
2057
2058    assert_page_locked(p);
2059    if (!p->code_bitmap &&
2060        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
2061        build_page_bitmap(p);
2062    }
2063    if (p->code_bitmap) {
2064        unsigned int nr;
2065        unsigned long b;
2066
2067        nr = start & ~TARGET_PAGE_MASK;
2068        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
2069        if (b & ((1 << len) - 1)) {
2070            goto do_invalidate;
2071        }
2072    } else {
2073    do_invalidate:
2074        tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1);
2075    }
2076}
2077#else
2078/* Called with mmap_lock held. If pc is not 0 then it indicates the
2079 * host PC of the faulting store instruction that caused this invalidate.
2080 * Returns true if the caller needs to abort execution of the current
2081 * TB (because it was modified by this store and the guest CPU has
2082 * precise-SMC semantics).
2083 */
2084static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
2085{
2086    TranslationBlock *tb;
2087    PageDesc *p;
2088    int n;
2089#ifdef TARGET_HAS_PRECISE_SMC
2090    TranslationBlock *current_tb = NULL;
2091    CPUState *cpu = current_cpu;
2092    CPUArchState *env = NULL;
2093    int current_tb_modified = 0;
2094    target_ulong current_pc = 0;
2095    target_ulong current_cs_base = 0;
2096    uint32_t current_flags = 0;
2097#endif
2098
2099    assert_memory_lock();
2100
2101    addr &= TARGET_PAGE_MASK;
2102    p = page_find(addr >> TARGET_PAGE_BITS);
2103    if (!p) {
2104        return false;
2105    }
2106
2107#ifdef TARGET_HAS_PRECISE_SMC
2108    if (p->first_tb && pc != 0) {
2109        current_tb = tcg_tb_lookup(pc);
2110    }
2111    if (cpu != NULL) {
2112        env = cpu->env_ptr;
2113    }
2114#endif
2115    assert_page_locked(p);
2116    PAGE_FOR_EACH_TB(p, tb, n) {
2117#ifdef TARGET_HAS_PRECISE_SMC
2118        if (current_tb == tb &&
2119            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
2120                /* If we are modifying the current TB, we must stop
2121                   its execution. We could be more precise by checking
2122                   that the modification is after the current PC, but it
2123                   would require a specialized function to partially
2124                   restore the CPU state */
2125
2126            current_tb_modified = 1;
2127            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
2128            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
2129                                 &current_flags);
2130        }
2131#endif /* TARGET_HAS_PRECISE_SMC */
2132        tb_phys_invalidate(tb, addr);
2133    }
2134    p->first_tb = (uintptr_t)NULL;
2135#ifdef TARGET_HAS_PRECISE_SMC
2136    if (current_tb_modified) {
2137        /* Force execution of one insn next time.  */
2138        cpu->cflags_next_tb = 1 | curr_cflags();
2139        return true;
2140    }
2141#endif
2142
2143    return false;
2144}
2145#endif
2146
2147/* user-mode: call with mmap_lock held */
2148void tb_check_watchpoint(CPUState *cpu)
2149{
2150    TranslationBlock *tb;
2151
2152    assert_memory_lock();
2153
2154    tb = tcg_tb_lookup(cpu->mem_io_pc);
2155    if (tb) {
2156        /* We can use retranslation to find the PC.  */
2157        cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc, true);
2158        tb_phys_invalidate(tb, -1);
2159    } else {
2160        /* The exception probably happened in a helper.  The CPU state should
2161           have been saved before calling it. Fetch the PC from there.  */
2162        CPUArchState *env = cpu->env_ptr;
2163        target_ulong pc, cs_base;
2164        tb_page_addr_t addr;
2165        uint32_t flags;
2166
2167        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
2168        addr = get_page_addr_code(env, pc);
2169        if (addr != -1) {
2170            tb_invalidate_phys_range(addr, addr + 1);
2171        }
2172    }
2173}
2174
2175#ifndef CONFIG_USER_ONLY
2176/* in deterministic execution mode, instructions doing device I/Os
2177 * must be at the end of the TB.
2178 *
2179 * Called by softmmu_template.h, with iothread mutex not held.
2180 */
2181void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
2182{
2183#if defined(TARGET_MIPS) || defined(TARGET_SH4)
2184    CPUArchState *env = cpu->env_ptr;
2185#endif
2186    TranslationBlock *tb;
2187    uint32_t n;
2188
2189    tb = tcg_tb_lookup(retaddr);
2190    if (!tb) {
2191        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
2192                  (void *)retaddr);
2193    }
2194    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
2195
2196    /* On MIPS and SH, delay slot instructions can only be restarted if
2197       they were already the first instruction in the TB.  If this is not
2198       the first instruction in a TB then re-execute the preceding
2199       branch.  */
2200    n = 1;
2201#if defined(TARGET_MIPS)
2202    if ((env->hflags & MIPS_HFLAG_BMASK) != 0
2203        && env->active_tc.PC != tb->pc) {
2204        env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
2205        cpu_neg(cpu)->icount_decr.u16.low++;
2206        env->hflags &= ~MIPS_HFLAG_BMASK;
2207        n = 2;
2208    }
2209#elif defined(TARGET_SH4)
2210    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
2211        && env->pc != tb->pc) {
2212        env->pc -= 2;
2213        cpu_neg(cpu)->icount_decr.u16.low++;
2214        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
2215        n = 2;
2216    }
2217#endif
2218
2219    /* Generate a new TB executing the I/O insn.  */
2220    cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
2221
2222    if (tb_cflags(tb) & CF_NOCACHE) {
2223        if (tb->orig_tb) {
2224            /* Invalidate original TB if this TB was generated in
2225             * cpu_exec_nocache() */
2226            tb_phys_invalidate(tb->orig_tb, -1);
2227        }
2228        tcg_tb_remove(tb);
2229    }
2230
2231    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
2232     * the first in the TB) then we end up generating a whole new TB and
2233     *  repeating the fault, which is horribly inefficient.
2234     *  Better would be to execute just this insn uncached, or generate a
2235     *  second new TB.
2236     */
2237    cpu_loop_exit_noexc(cpu);
2238}
2239
2240static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
2241{
2242    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
2243
2244    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
2245        atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
2246    }
2247}
2248
2249void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
2250{
2251    /* Discard jump cache entries for any tb which might potentially
2252       overlap the flushed page.  */
2253    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
2254    tb_jmp_cache_clear_page(cpu, addr);
2255}
2256
2257static void print_qht_statistics(struct qht_stats hst)
2258{
2259    uint32_t hgram_opts;
2260    size_t hgram_bins;
2261    char *hgram;
2262
2263    if (!hst.head_buckets) {
2264        return;
2265    }
2266    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
2267                hst.used_head_buckets, hst.head_buckets,
2268                (double)hst.used_head_buckets / hst.head_buckets * 100);
2269
2270    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
2271    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
2272    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
2273        hgram_opts |= QDIST_PR_NODECIMAL;
2274    }
2275    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
2276    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
2277                qdist_avg(&hst.occupancy) * 100, hgram);
2278    g_free(hgram);
2279
2280    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
2281    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
2282    if (hgram_bins > 10) {
2283        hgram_bins = 10;
2284    } else {
2285        hgram_bins = 0;
2286        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
2287    }
2288    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
2289    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
2290                qdist_avg(&hst.chain), hgram);
2291    g_free(hgram);
2292}
2293
2294struct tb_tree_stats {
2295    size_t nb_tbs;
2296    size_t host_size;
2297    size_t target_size;
2298    size_t max_target_size;
2299    size_t direct_jmp_count;
2300    size_t direct_jmp2_count;
2301    size_t cross_page;
2302};
2303
2304static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
2305{
2306    const TranslationBlock *tb = value;
2307    struct tb_tree_stats *tst = data;
2308
2309    tst->nb_tbs++;
2310    tst->host_size += tb->tc.size;
2311    tst->target_size += tb->size;
2312    if (tb->size > tst->max_target_size) {
2313        tst->max_target_size = tb->size;
2314    }
2315    if (tb->page_addr[1] != -1) {
2316        tst->cross_page++;
2317    }
2318    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
2319        tst->direct_jmp_count++;
2320        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
2321            tst->direct_jmp2_count++;
2322        }
2323    }
2324    return false;
2325}
2326
2327void dump_exec_info(void)
2328{
2329    struct tb_tree_stats tst = {};
2330    struct qht_stats hst;
2331    size_t nb_tbs, flush_full, flush_part, flush_elide;
2332
2333    tcg_tb_foreach(tb_tree_stats_iter, &tst);
2334    nb_tbs = tst.nb_tbs;
2335    /* XXX: avoid using doubles ? */
2336    qemu_printf("Translation buffer state:\n");
2337    /*
2338     * Report total code size including the padding and TB structs;
2339     * otherwise users might think "-tb-size" is not honoured.
2340     * For avg host size we use the precise numbers from tb_tree_stats though.
2341     */
2342    qemu_printf("gen code size       %zu/%zu\n",
2343                tcg_code_size(), tcg_code_capacity());
2344    qemu_printf("TB count            %zu\n", nb_tbs);
2345    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
2346                nb_tbs ? tst.target_size / nb_tbs : 0,
2347                tst.max_target_size);
2348    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
2349                nb_tbs ? tst.host_size / nb_tbs : 0,
2350                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
2351    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
2352                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
2353    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
2354                tst.direct_jmp_count,
2355                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
2356                tst.direct_jmp2_count,
2357                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
2358
2359    qht_statistics_init(&tb_ctx.htable, &hst);
2360    print_qht_statistics(hst);
2361    qht_statistics_destroy(&hst);
2362
2363    qemu_printf("\nStatistics:\n");
2364    qemu_printf("TB flush count      %u\n",
2365                atomic_read(&tb_ctx.tb_flush_count));
2366    qemu_printf("TB invalidate count %zu\n",
2367                tcg_tb_phys_invalidate_count());
2368
2369    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
2370    qemu_printf("TLB full flushes    %zu\n", flush_full);
2371    qemu_printf("TLB partial flushes %zu\n", flush_part);
2372    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
2373    tcg_dump_info();
2374}
2375
2376void dump_opcount_info(void)
2377{
2378    tcg_dump_op_count();
2379}
2380
2381#else /* CONFIG_USER_ONLY */
2382
2383void cpu_interrupt(CPUState *cpu, int mask)
2384{
2385    g_assert(qemu_mutex_iothread_locked());
2386    cpu->interrupt_request |= mask;
2387    atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
2388}
2389
2390/*
2391 * Walks guest process memory "regions" one by one
2392 * and calls callback function 'fn' for each region.
2393 */
2394struct walk_memory_regions_data {
2395    walk_memory_regions_fn fn;
2396    void *priv;
2397    target_ulong start;
2398    int prot;
2399};
2400
2401static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2402                                   target_ulong end, int new_prot)
2403{
2404    if (data->start != -1u) {
2405        int rc = data->fn(data->priv, data->start, end, data->prot);
2406        if (rc != 0) {
2407            return rc;
2408        }
2409    }
2410
2411    data->start = (new_prot ? end : -1u);
2412    data->prot = new_prot;
2413
2414    return 0;
2415}
2416
2417static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2418                                 target_ulong base, int level, void **lp)
2419{
2420    target_ulong pa;
2421    int i, rc;
2422
2423    if (*lp == NULL) {
2424        return walk_memory_regions_end(data, base, 0);
2425    }
2426
2427    if (level == 0) {
2428        PageDesc *pd = *lp;
2429
2430        for (i = 0; i < V_L2_SIZE; ++i) {
2431            int prot = pd[i].flags;
2432
2433            pa = base | (i << TARGET_PAGE_BITS);
2434            if (prot != data->prot) {
2435                rc = walk_memory_regions_end(data, pa, prot);
2436                if (rc != 0) {
2437                    return rc;
2438                }
2439            }
2440        }
2441    } else {
2442        void **pp = *lp;
2443
2444        for (i = 0; i < V_L2_SIZE; ++i) {
2445            pa = base | ((target_ulong)i <<
2446                (TARGET_PAGE_BITS + V_L2_BITS * level));
2447            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2448            if (rc != 0) {
2449                return rc;
2450            }
2451        }
2452    }
2453
2454    return 0;
2455}
2456
2457int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2458{
2459    struct walk_memory_regions_data data;
2460    uintptr_t i, l1_sz = v_l1_size;
2461
2462    data.fn = fn;
2463    data.priv = priv;
2464    data.start = -1u;
2465    data.prot = 0;
2466
2467    for (i = 0; i < l1_sz; i++) {
2468        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
2469        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
2470        if (rc != 0) {
2471            return rc;
2472        }
2473    }
2474
2475    return walk_memory_regions_end(&data, 0, 0);
2476}
2477
2478static int dump_region(void *priv, target_ulong start,
2479    target_ulong end, unsigned long prot)
2480{
2481    FILE *f = (FILE *)priv;
2482
2483    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
2484        " "TARGET_FMT_lx" %c%c%c\n",
2485        start, end, end - start,
2486        ((prot & PAGE_READ) ? 'r' : '-'),
2487        ((prot & PAGE_WRITE) ? 'w' : '-'),
2488        ((prot & PAGE_EXEC) ? 'x' : '-'));
2489
2490    return 0;
2491}
2492
2493/* dump memory mappings */
2494void page_dump(FILE *f)
2495{
2496    const int length = sizeof(target_ulong) * 2;
2497    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
2498            length, "start", length, "end", length, "size", "prot");
2499    walk_memory_regions(f, dump_region);
2500}
2501
2502int page_get_flags(target_ulong address)
2503{
2504    PageDesc *p;
2505
2506    p = page_find(address >> TARGET_PAGE_BITS);
2507    if (!p) {
2508        return 0;
2509    }
2510    return p->flags;
2511}
2512
2513/* Modify the flags of a page and invalidate the code if necessary.
2514   The flag PAGE_WRITE_ORG is positioned automatically depending
2515   on PAGE_WRITE.  The mmap_lock should already be held.  */
2516void page_set_flags(target_ulong start, target_ulong end, int flags)
2517{
2518    target_ulong addr, len;
2519
2520    /* This function should never be called with addresses outside the
2521       guest address space.  If this assert fires, it probably indicates
2522       a missing call to h2g_valid.  */
2523#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2524    assert(end <= ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2525#endif
2526    assert(start < end);
2527    assert_memory_lock();
2528
2529    start = start & TARGET_PAGE_MASK;
2530    end = TARGET_PAGE_ALIGN(end);
2531
2532    if (flags & PAGE_WRITE) {
2533        flags |= PAGE_WRITE_ORG;
2534    }
2535
2536    for (addr = start, len = end - start;
2537         len != 0;
2538         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2539        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2540
2541        /* If the write protection bit is set, then we invalidate
2542           the code inside.  */
2543        if (!(p->flags & PAGE_WRITE) &&
2544            (flags & PAGE_WRITE) &&
2545            p->first_tb) {
2546            tb_invalidate_phys_page(addr, 0);
2547        }
2548        p->flags = flags;
2549    }
2550}
2551
2552int page_check_range(target_ulong start, target_ulong len, int flags)
2553{
2554    PageDesc *p;
2555    target_ulong end;
2556    target_ulong addr;
2557
2558    /* This function should never be called with addresses outside the
2559       guest address space.  If this assert fires, it probably indicates
2560       a missing call to h2g_valid.  */
2561#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2562    assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2563#endif
2564
2565    if (len == 0) {
2566        return 0;
2567    }
2568    if (start + len - 1 < start) {
2569        /* We've wrapped around.  */
2570        return -1;
2571    }
2572
2573    /* must do before we loose bits in the next step */
2574    end = TARGET_PAGE_ALIGN(start + len);
2575    start = start & TARGET_PAGE_MASK;
2576
2577    for (addr = start, len = end - start;
2578         len != 0;
2579         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2580        p = page_find(addr >> TARGET_PAGE_BITS);
2581        if (!p) {
2582            return -1;
2583        }
2584        if (!(p->flags & PAGE_VALID)) {
2585            return -1;
2586        }
2587
2588        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2589            return -1;
2590        }
2591        if (flags & PAGE_WRITE) {
2592            if (!(p->flags & PAGE_WRITE_ORG)) {
2593                return -1;
2594            }
2595            /* unprotect the page if it was put read-only because it
2596               contains translated code */
2597            if (!(p->flags & PAGE_WRITE)) {
2598                if (!page_unprotect(addr, 0)) {
2599                    return -1;
2600                }
2601            }
2602        }
2603    }
2604    return 0;
2605}
2606
2607/* called from signal handler: invalidate the code and unprotect the
2608 * page. Return 0 if the fault was not handled, 1 if it was handled,
2609 * and 2 if it was handled but the caller must cause the TB to be
2610 * immediately exited. (We can only return 2 if the 'pc' argument is
2611 * non-zero.)
2612 */
2613int page_unprotect(target_ulong address, uintptr_t pc)
2614{
2615    unsigned int prot;
2616    bool current_tb_invalidated;
2617    PageDesc *p;
2618    target_ulong host_start, host_end, addr;
2619
2620    /* Technically this isn't safe inside a signal handler.  However we
2621       know this only ever happens in a synchronous SEGV handler, so in
2622       practice it seems to be ok.  */
2623    mmap_lock();
2624
2625    p = page_find(address >> TARGET_PAGE_BITS);
2626    if (!p) {
2627        mmap_unlock();
2628        return 0;
2629    }
2630
2631    /* if the page was really writable, then we change its
2632       protection back to writable */
2633    if (p->flags & PAGE_WRITE_ORG) {
2634        current_tb_invalidated = false;
2635        if (p->flags & PAGE_WRITE) {
2636            /* If the page is actually marked WRITE then assume this is because
2637             * this thread raced with another one which got here first and
2638             * set the page to PAGE_WRITE and did the TB invalidate for us.
2639             */
2640#ifdef TARGET_HAS_PRECISE_SMC
2641            TranslationBlock *current_tb = tcg_tb_lookup(pc);
2642            if (current_tb) {
2643                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
2644            }
2645#endif
2646        } else {
2647            host_start = address & qemu_host_page_mask;
2648            host_end = host_start + qemu_host_page_size;
2649
2650            prot = 0;
2651            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
2652                p = page_find(addr >> TARGET_PAGE_BITS);
2653                p->flags |= PAGE_WRITE;
2654                prot |= p->flags;
2655
2656                /* and since the content will be modified, we must invalidate
2657                   the corresponding translated code. */
2658                current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
2659#ifdef CONFIG_USER_ONLY
2660                if (DEBUG_TB_CHECK_GATE) {
2661                    tb_invalidate_check(addr);
2662                }
2663#endif
2664            }
2665            mprotect((void *)g2h(host_start), qemu_host_page_size,
2666                     prot & PAGE_BITS);
2667        }
2668        mmap_unlock();
2669        /* If current TB was invalidated return to main loop */
2670        return current_tb_invalidated ? 2 : 1;
2671    }
2672    mmap_unlock();
2673    return 0;
2674}
2675#endif /* CONFIG_USER_ONLY */
2676
2677/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
2678void tcg_flush_softmmu_tlb(CPUState *cs)
2679{
2680#ifdef CONFIG_SOFTMMU
2681    tlb_flush(cs);
2682#endif
2683}
2684