qemu/accel/tcg/translate-all.c
<<
>>
Prefs
   1/*
   2 *  Host code generation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21
  22#define NO_CPU_IO_DEFS
  23#include "trace.h"
  24#include "disas/disas.h"
  25#include "exec/exec-all.h"
  26#include "tcg/tcg.h"
  27#if defined(CONFIG_USER_ONLY)
  28#include "qemu.h"
  29#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  30#include <sys/param.h>
  31#if __FreeBSD_version >= 700104
  32#define HAVE_KINFO_GETVMMAP
  33#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  34#include <sys/proc.h>
  35#include <machine/profile.h>
  36#define _KERNEL
  37#include <sys/user.h>
  38#undef _KERNEL
  39#undef sigqueue
  40#include <libutil.h>
  41#endif
  42#endif
  43#else
  44#include "exec/ram_addr.h"
  45#endif
  46
  47#include "exec/cputlb.h"
  48#include "exec/translate-all.h"
  49#include "exec/translator.h"
  50#include "qemu/bitmap.h"
  51#include "qemu/qemu-print.h"
  52#include "qemu/timer.h"
  53#include "qemu/main-loop.h"
  54#include "qemu/cacheinfo.h"
  55#include "exec/log.h"
  56#include "sysemu/cpus.h"
  57#include "sysemu/cpu-timers.h"
  58#include "sysemu/tcg.h"
  59#include "qapi/error.h"
  60#include "hw/core/tcg-cpu-ops.h"
  61#include "tb-jmp-cache.h"
  62#include "tb-hash.h"
  63#include "tb-context.h"
  64#include "internal.h"
  65
  66/* make various TB consistency checks */
  67
  68/**
  69 * struct page_entry - page descriptor entry
  70 * @pd:     pointer to the &struct PageDesc of the page this entry represents
  71 * @index:  page index of the page
  72 * @locked: whether the page is locked
  73 *
  74 * This struct helps us keep track of the locked state of a page, without
  75 * bloating &struct PageDesc.
  76 *
  77 * A page lock protects accesses to all fields of &struct PageDesc.
  78 *
  79 * See also: &struct page_collection.
  80 */
  81struct page_entry {
  82    PageDesc *pd;
  83    tb_page_addr_t index;
  84    bool locked;
  85};
  86
  87/**
  88 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
  89 * @tree:   Binary search tree (BST) of the pages, with key == page index
  90 * @max:    Pointer to the page in @tree with the highest page index
  91 *
  92 * To avoid deadlock we lock pages in ascending order of page index.
  93 * When operating on a set of pages, we need to keep track of them so that
  94 * we can lock them in order and also unlock them later. For this we collect
  95 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
  96 * @tree implementation we use does not provide an O(1) operation to obtain the
  97 * highest-ranked element, we use @max to keep track of the inserted page
  98 * with the highest index. This is valuable because if a page is not in
  99 * the tree and its index is higher than @max's, then we can lock it
 100 * without breaking the locking order rule.
 101 *
 102 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 103 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 104 *
 105 * See also: page_collection_lock().
 106 */
 107struct page_collection {
 108    GTree *tree;
 109    struct page_entry *max;
 110};
 111
 112/*
 113 * In system mode we want L1_MAP to be based on ram offsets,
 114 * while in user mode we want it to be based on virtual addresses.
 115 *
 116 * TODO: For user mode, see the caveat re host vs guest virtual
 117 * address spaces near GUEST_ADDR_MAX.
 118 */
 119#if !defined(CONFIG_USER_ONLY)
 120#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 121# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 122#else
 123# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 124#endif
 125#else
 126# define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
 127#endif
 128
 129/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
 130QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
 131                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
 132                  * BITS_PER_BYTE);
 133
 134/*
 135 * L1 Mapping properties
 136 */
 137int v_l1_size;
 138int v_l1_shift;
 139int v_l2_levels;
 140
 141void *l1_map[V_L1_MAX_SIZE];
 142
 143TBContext tb_ctx;
 144
 145static void page_table_config_init(void)
 146{
 147    uint32_t v_l1_bits;
 148
 149    assert(TARGET_PAGE_BITS);
 150    /* The bits remaining after N lower levels of page tables.  */
 151    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 152    if (v_l1_bits < V_L1_MIN_BITS) {
 153        v_l1_bits += V_L2_BITS;
 154    }
 155
 156    v_l1_size = 1 << v_l1_bits;
 157    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 158    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 159
 160    assert(v_l1_bits <= V_L1_MAX_BITS);
 161    assert(v_l1_shift % V_L2_BITS == 0);
 162    assert(v_l2_levels >= 0);
 163}
 164
 165/* Encode VAL as a signed leb128 sequence at P.
 166   Return P incremented past the encoded value.  */
 167static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 168{
 169    int more, byte;
 170
 171    do {
 172        byte = val & 0x7f;
 173        val >>= 7;
 174        more = !((val == 0 && (byte & 0x40) == 0)
 175                 || (val == -1 && (byte & 0x40) != 0));
 176        if (more) {
 177            byte |= 0x80;
 178        }
 179        *p++ = byte;
 180    } while (more);
 181
 182    return p;
 183}
 184
 185/* Decode a signed leb128 sequence at *PP; increment *PP past the
 186   decoded value.  Return the decoded value.  */
 187static target_long decode_sleb128(const uint8_t **pp)
 188{
 189    const uint8_t *p = *pp;
 190    target_long val = 0;
 191    int byte, shift = 0;
 192
 193    do {
 194        byte = *p++;
 195        val |= (target_ulong)(byte & 0x7f) << shift;
 196        shift += 7;
 197    } while (byte & 0x80);
 198    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 199        val |= -(target_ulong)1 << shift;
 200    }
 201
 202    *pp = p;
 203    return val;
 204}
 205
 206/* Encode the data collected about the instructions while compiling TB.
 207   Place the data at BLOCK, and return the number of bytes consumed.
 208
 209   The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 210   which come from the target's insn_start data, followed by a uintptr_t
 211   which comes from the host pc of the end of the code implementing the insn.
 212
 213   Each line of the table is encoded as sleb128 deltas from the previous
 214   line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 215   That is, the first column is seeded with the guest pc, the last column
 216   with the host pc, and the middle columns with zeros.  */
 217
 218static int encode_search(TranslationBlock *tb, uint8_t *block)
 219{
 220    uint8_t *highwater = tcg_ctx->code_gen_highwater;
 221    uint8_t *p = block;
 222    int i, j, n;
 223
 224    for (i = 0, n = tb->icount; i < n; ++i) {
 225        target_ulong prev;
 226
 227        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 228            if (i == 0) {
 229                prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
 230            } else {
 231                prev = tcg_ctx->gen_insn_data[i - 1][j];
 232            }
 233            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
 234        }
 235        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
 236        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
 237
 238        /* Test for (pending) buffer overflow.  The assumption is that any
 239           one row beginning below the high water mark cannot overrun
 240           the buffer completely.  Thus we can test for overflow after
 241           encoding a row without having to check during encoding.  */
 242        if (unlikely(p > highwater)) {
 243            return -1;
 244        }
 245    }
 246
 247    return p - block;
 248}
 249
 250static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
 251                                   uint64_t *data)
 252{
 253    uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
 254    const uint8_t *p = tb->tc.ptr + tb->tc.size;
 255    int i, j, num_insns = tb->icount;
 256
 257    host_pc -= GETPC_ADJ;
 258
 259    if (host_pc < iter_pc) {
 260        return -1;
 261    }
 262
 263    memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
 264    if (!TARGET_TB_PCREL) {
 265        data[0] = tb_pc(tb);
 266    }
 267
 268    /*
 269     * Reconstruct the stored insn data while looking for the point
 270     * at which the end of the insn exceeds host_pc.
 271     */
 272    for (i = 0; i < num_insns; ++i) {
 273        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 274            data[j] += decode_sleb128(&p);
 275        }
 276        iter_pc += decode_sleb128(&p);
 277        if (iter_pc > host_pc) {
 278            return num_insns - i;
 279        }
 280    }
 281    return -1;
 282}
 283
 284/*
 285 * The cpu state corresponding to 'host_pc' is restored in
 286 * preparation for exiting the TB.
 287 */
 288void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 289                               uintptr_t host_pc)
 290{
 291    uint64_t data[TARGET_INSN_START_WORDS];
 292#ifdef CONFIG_PROFILER
 293    TCGProfile *prof = &tcg_ctx->prof;
 294    int64_t ti = profile_getclock();
 295#endif
 296    int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 297
 298    if (insns_left < 0) {
 299        return;
 300    }
 301
 302    if (tb_cflags(tb) & CF_USE_ICOUNT) {
 303        assert(icount_enabled());
 304        /*
 305         * Reset the cycle counter to the start of the block and
 306         * shift if to the number of actually executed instructions.
 307         */
 308        cpu_neg(cpu)->icount_decr.u16.low += insns_left;
 309    }
 310
 311    cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
 312
 313#ifdef CONFIG_PROFILER
 314    qatomic_set(&prof->restore_time,
 315                prof->restore_time + profile_getclock() - ti);
 316    qatomic_set(&prof->restore_count, prof->restore_count + 1);
 317#endif
 318}
 319
 320bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
 321{
 322    /*
 323     * The host_pc has to be in the rx region of the code buffer.
 324     * If it is not we will not be able to resolve it here.
 325     * The two cases where host_pc will not be correct are:
 326     *
 327     *  - fault during translation (instruction fetch)
 328     *  - fault from helper (not using GETPC() macro)
 329     *
 330     * Either way we need return early as we can't resolve it here.
 331     */
 332    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 333        TranslationBlock *tb = tcg_tb_lookup(host_pc);
 334        if (tb) {
 335            cpu_restore_state_from_tb(cpu, tb, host_pc);
 336            return true;
 337        }
 338    }
 339    return false;
 340}
 341
 342bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
 343{
 344    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 345        TranslationBlock *tb = tcg_tb_lookup(host_pc);
 346        if (tb) {
 347            return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
 348        }
 349    }
 350    return false;
 351}
 352
 353void page_init(void)
 354{
 355    page_size_init();
 356    page_table_config_init();
 357
 358#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 359    {
 360#ifdef HAVE_KINFO_GETVMMAP
 361        struct kinfo_vmentry *freep;
 362        int i, cnt;
 363
 364        freep = kinfo_getvmmap(getpid(), &cnt);
 365        if (freep) {
 366            mmap_lock();
 367            for (i = 0; i < cnt; i++) {
 368                unsigned long startaddr, endaddr;
 369
 370                startaddr = freep[i].kve_start;
 371                endaddr = freep[i].kve_end;
 372                if (h2g_valid(startaddr)) {
 373                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 374
 375                    if (h2g_valid(endaddr)) {
 376                        endaddr = h2g(endaddr);
 377                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 378                    } else {
 379#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 380                        endaddr = ~0ul;
 381                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 382#endif
 383                    }
 384                }
 385            }
 386            free(freep);
 387            mmap_unlock();
 388        }
 389#else
 390        FILE *f;
 391
 392        last_brk = (unsigned long)sbrk(0);
 393
 394        f = fopen("/compat/linux/proc/self/maps", "r");
 395        if (f) {
 396            mmap_lock();
 397
 398            do {
 399                unsigned long startaddr, endaddr;
 400                int n;
 401
 402                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 403
 404                if (n == 2 && h2g_valid(startaddr)) {
 405                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 406
 407                    if (h2g_valid(endaddr)) {
 408                        endaddr = h2g(endaddr);
 409                    } else {
 410                        endaddr = ~0ul;
 411                    }
 412                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 413                }
 414            } while (!feof(f));
 415
 416            fclose(f);
 417            mmap_unlock();
 418        }
 419#endif
 420    }
 421#endif
 422}
 423
 424PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
 425{
 426    PageDesc *pd;
 427    void **lp;
 428    int i;
 429
 430    /* Level 1.  Always allocated.  */
 431    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 432
 433    /* Level 2..N-1.  */
 434    for (i = v_l2_levels; i > 0; i--) {
 435        void **p = qatomic_rcu_read(lp);
 436
 437        if (p == NULL) {
 438            void *existing;
 439
 440            if (!alloc) {
 441                return NULL;
 442            }
 443            p = g_new0(void *, V_L2_SIZE);
 444            existing = qatomic_cmpxchg(lp, NULL, p);
 445            if (unlikely(existing)) {
 446                g_free(p);
 447                p = existing;
 448            }
 449        }
 450
 451        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 452    }
 453
 454    pd = qatomic_rcu_read(lp);
 455    if (pd == NULL) {
 456        void *existing;
 457
 458        if (!alloc) {
 459            return NULL;
 460        }
 461        pd = g_new0(PageDesc, V_L2_SIZE);
 462#ifndef CONFIG_USER_ONLY
 463        {
 464            int i;
 465
 466            for (i = 0; i < V_L2_SIZE; i++) {
 467                qemu_spin_init(&pd[i].lock);
 468            }
 469        }
 470#endif
 471        existing = qatomic_cmpxchg(lp, NULL, pd);
 472        if (unlikely(existing)) {
 473#ifndef CONFIG_USER_ONLY
 474            {
 475                int i;
 476
 477                for (i = 0; i < V_L2_SIZE; i++) {
 478                    qemu_spin_destroy(&pd[i].lock);
 479                }
 480            }
 481#endif
 482            g_free(pd);
 483            pd = existing;
 484        }
 485    }
 486
 487    return pd + (index & (V_L2_SIZE - 1));
 488}
 489
 490/* In user-mode page locks aren't used; mmap_lock is enough */
 491#ifdef CONFIG_USER_ONLY
 492struct page_collection *
 493page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 494{
 495    return NULL;
 496}
 497
 498void page_collection_unlock(struct page_collection *set)
 499{ }
 500#else /* !CONFIG_USER_ONLY */
 501
 502#ifdef CONFIG_DEBUG_TCG
 503
 504static __thread GHashTable *ht_pages_locked_debug;
 505
 506static void ht_pages_locked_debug_init(void)
 507{
 508    if (ht_pages_locked_debug) {
 509        return;
 510    }
 511    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 512}
 513
 514static bool page_is_locked(const PageDesc *pd)
 515{
 516    PageDesc *found;
 517
 518    ht_pages_locked_debug_init();
 519    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 520    return !!found;
 521}
 522
 523static void page_lock__debug(PageDesc *pd)
 524{
 525    ht_pages_locked_debug_init();
 526    g_assert(!page_is_locked(pd));
 527    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 528}
 529
 530static void page_unlock__debug(const PageDesc *pd)
 531{
 532    bool removed;
 533
 534    ht_pages_locked_debug_init();
 535    g_assert(page_is_locked(pd));
 536    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 537    g_assert(removed);
 538}
 539
 540void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
 541{
 542    if (unlikely(!page_is_locked(pd))) {
 543        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 544                     pd, file, line);
 545        abort();
 546    }
 547}
 548
 549void assert_no_pages_locked(void)
 550{
 551    ht_pages_locked_debug_init();
 552    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 553}
 554
 555#else /* !CONFIG_DEBUG_TCG */
 556
 557static inline void page_lock__debug(const PageDesc *pd) { }
 558static inline void page_unlock__debug(const PageDesc *pd) { }
 559
 560#endif /* CONFIG_DEBUG_TCG */
 561
 562void page_lock(PageDesc *pd)
 563{
 564    page_lock__debug(pd);
 565    qemu_spin_lock(&pd->lock);
 566}
 567
 568void page_unlock(PageDesc *pd)
 569{
 570    qemu_spin_unlock(&pd->lock);
 571    page_unlock__debug(pd);
 572}
 573
 574static inline struct page_entry *
 575page_entry_new(PageDesc *pd, tb_page_addr_t index)
 576{
 577    struct page_entry *pe = g_malloc(sizeof(*pe));
 578
 579    pe->index = index;
 580    pe->pd = pd;
 581    pe->locked = false;
 582    return pe;
 583}
 584
 585static void page_entry_destroy(gpointer p)
 586{
 587    struct page_entry *pe = p;
 588
 589    g_assert(pe->locked);
 590    page_unlock(pe->pd);
 591    g_free(pe);
 592}
 593
 594/* returns false on success */
 595static bool page_entry_trylock(struct page_entry *pe)
 596{
 597    bool busy;
 598
 599    busy = qemu_spin_trylock(&pe->pd->lock);
 600    if (!busy) {
 601        g_assert(!pe->locked);
 602        pe->locked = true;
 603        page_lock__debug(pe->pd);
 604    }
 605    return busy;
 606}
 607
 608static void do_page_entry_lock(struct page_entry *pe)
 609{
 610    page_lock(pe->pd);
 611    g_assert(!pe->locked);
 612    pe->locked = true;
 613}
 614
 615static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 616{
 617    struct page_entry *pe = value;
 618
 619    do_page_entry_lock(pe);
 620    return FALSE;
 621}
 622
 623static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 624{
 625    struct page_entry *pe = value;
 626
 627    if (pe->locked) {
 628        pe->locked = false;
 629        page_unlock(pe->pd);
 630    }
 631    return FALSE;
 632}
 633
 634/*
 635 * Trylock a page, and if successful, add the page to a collection.
 636 * Returns true ("busy") if the page could not be locked; false otherwise.
 637 */
 638static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 639{
 640    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 641    struct page_entry *pe;
 642    PageDesc *pd;
 643
 644    pe = g_tree_lookup(set->tree, &index);
 645    if (pe) {
 646        return false;
 647    }
 648
 649    pd = page_find(index);
 650    if (pd == NULL) {
 651        return false;
 652    }
 653
 654    pe = page_entry_new(pd, index);
 655    g_tree_insert(set->tree, &pe->index, pe);
 656
 657    /*
 658     * If this is either (1) the first insertion or (2) a page whose index
 659     * is higher than any other so far, just lock the page and move on.
 660     */
 661    if (set->max == NULL || pe->index > set->max->index) {
 662        set->max = pe;
 663        do_page_entry_lock(pe);
 664        return false;
 665    }
 666    /*
 667     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 668     * locks in order.
 669     */
 670    return page_entry_trylock(pe);
 671}
 672
 673static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 674{
 675    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 676    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 677
 678    if (a == b) {
 679        return 0;
 680    } else if (a < b) {
 681        return -1;
 682    }
 683    return 1;
 684}
 685
 686/*
 687 * Lock a range of pages ([@start,@end[) as well as the pages of all
 688 * intersecting TBs.
 689 * Locking order: acquire locks in ascending order of page index.
 690 */
 691struct page_collection *
 692page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
 693{
 694    struct page_collection *set = g_malloc(sizeof(*set));
 695    tb_page_addr_t index;
 696    PageDesc *pd;
 697
 698    start >>= TARGET_PAGE_BITS;
 699    end   >>= TARGET_PAGE_BITS;
 700    g_assert(start <= end);
 701
 702    set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 703                                page_entry_destroy);
 704    set->max = NULL;
 705    assert_no_pages_locked();
 706
 707 retry:
 708    g_tree_foreach(set->tree, page_entry_lock, NULL);
 709
 710    for (index = start; index <= end; index++) {
 711        TranslationBlock *tb;
 712        int n;
 713
 714        pd = page_find(index);
 715        if (pd == NULL) {
 716            continue;
 717        }
 718        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 719            g_tree_foreach(set->tree, page_entry_unlock, NULL);
 720            goto retry;
 721        }
 722        assert_page_locked(pd);
 723        PAGE_FOR_EACH_TB(pd, tb, n) {
 724            if (page_trylock_add(set, tb_page_addr0(tb)) ||
 725                (tb_page_addr1(tb) != -1 &&
 726                 page_trylock_add(set, tb_page_addr1(tb)))) {
 727                /* drop all locks, and reacquire in order */
 728                g_tree_foreach(set->tree, page_entry_unlock, NULL);
 729                goto retry;
 730            }
 731        }
 732    }
 733    return set;
 734}
 735
 736void page_collection_unlock(struct page_collection *set)
 737{
 738    /* entries are unlocked and freed via page_entry_destroy */
 739    g_tree_destroy(set->tree);
 740    g_free(set);
 741}
 742
 743#endif /* !CONFIG_USER_ONLY */
 744
 745/*
 746 * Isolate the portion of code gen which can setjmp/longjmp.
 747 * Return the size of the generated code, or negative on error.
 748 */
 749static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
 750                           target_ulong pc, void *host_pc,
 751                           int *max_insns, int64_t *ti)
 752{
 753    int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
 754    if (unlikely(ret != 0)) {
 755        return ret;
 756    }
 757
 758    tcg_func_start(tcg_ctx);
 759
 760    tcg_ctx->cpu = env_cpu(env);
 761    gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
 762    assert(tb->size != 0);
 763    tcg_ctx->cpu = NULL;
 764    *max_insns = tb->icount;
 765
 766#ifdef CONFIG_PROFILER
 767    qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
 768    qatomic_set(&tcg_ctx->prof.interm_time,
 769                tcg_ctx->prof.interm_time + profile_getclock() - *ti);
 770    *ti = profile_getclock();
 771#endif
 772
 773    return tcg_gen_code(tcg_ctx, tb, pc);
 774}
 775
 776/* Called with mmap_lock held for user mode emulation.  */
 777TranslationBlock *tb_gen_code(CPUState *cpu,
 778                              target_ulong pc, target_ulong cs_base,
 779                              uint32_t flags, int cflags)
 780{
 781    CPUArchState *env = cpu->env_ptr;
 782    TranslationBlock *tb, *existing_tb;
 783    tb_page_addr_t phys_pc;
 784    tcg_insn_unit *gen_code_buf;
 785    int gen_code_size, search_size, max_insns;
 786#ifdef CONFIG_PROFILER
 787    TCGProfile *prof = &tcg_ctx->prof;
 788#endif
 789    int64_t ti;
 790    void *host_pc;
 791
 792    assert_memory_lock();
 793    qemu_thread_jit_write();
 794
 795    phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
 796
 797    if (phys_pc == -1) {
 798        /* Generate a one-shot TB with 1 insn in it */
 799        cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
 800    }
 801
 802    max_insns = cflags & CF_COUNT_MASK;
 803    if (max_insns == 0) {
 804        max_insns = TCG_MAX_INSNS;
 805    }
 806    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
 807
 808 buffer_overflow:
 809    tb = tcg_tb_alloc(tcg_ctx);
 810    if (unlikely(!tb)) {
 811        /* flush must be done */
 812        tb_flush(cpu);
 813        mmap_unlock();
 814        /* Make the execution loop process the flush as soon as possible.  */
 815        cpu->exception_index = EXCP_INTERRUPT;
 816        cpu_loop_exit(cpu);
 817    }
 818
 819    gen_code_buf = tcg_ctx->code_gen_ptr;
 820    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
 821#if !TARGET_TB_PCREL
 822    tb->pc = pc;
 823#endif
 824    tb->cs_base = cs_base;
 825    tb->flags = flags;
 826    tb->cflags = cflags;
 827    tb->trace_vcpu_dstate = *cpu->trace_dstate;
 828    tb_set_page_addr0(tb, phys_pc);
 829    tb_set_page_addr1(tb, -1);
 830    tcg_ctx->tb_cflags = cflags;
 831 tb_overflow:
 832
 833#ifdef CONFIG_PROFILER
 834    /* includes aborted translations because of exceptions */
 835    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
 836    ti = profile_getclock();
 837#endif
 838
 839    trace_translate_block(tb, pc, tb->tc.ptr);
 840
 841    gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
 842    if (unlikely(gen_code_size < 0)) {
 843        switch (gen_code_size) {
 844        case -1:
 845            /*
 846             * Overflow of code_gen_buffer, or the current slice of it.
 847             *
 848             * TODO: We don't need to re-do gen_intermediate_code, nor
 849             * should we re-do the tcg optimization currently hidden
 850             * inside tcg_gen_code.  All that should be required is to
 851             * flush the TBs, allocate a new TB, re-initialize it per
 852             * above, and re-do the actual code generation.
 853             */
 854            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
 855                          "Restarting code generation for "
 856                          "code_gen_buffer overflow\n");
 857            goto buffer_overflow;
 858
 859        case -2:
 860            /*
 861             * The code generated for the TranslationBlock is too large.
 862             * The maximum size allowed by the unwind info is 64k.
 863             * There may be stricter constraints from relocations
 864             * in the tcg backend.
 865             *
 866             * Try again with half as many insns as we attempted this time.
 867             * If a single insn overflows, there's a bug somewhere...
 868             */
 869            assert(max_insns > 1);
 870            max_insns /= 2;
 871            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
 872                          "Restarting code generation with "
 873                          "smaller translation block (max %d insns)\n",
 874                          max_insns);
 875            goto tb_overflow;
 876
 877        default:
 878            g_assert_not_reached();
 879        }
 880    }
 881    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
 882    if (unlikely(search_size < 0)) {
 883        goto buffer_overflow;
 884    }
 885    tb->tc.size = gen_code_size;
 886
 887#ifdef CONFIG_PROFILER
 888    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
 889    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
 890    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
 891    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
 892#endif
 893
 894#ifdef DEBUG_DISAS
 895    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
 896        qemu_log_in_addr_range(pc)) {
 897        FILE *logfile = qemu_log_trylock();
 898        if (logfile) {
 899            int code_size, data_size;
 900            const tcg_target_ulong *rx_data_gen_ptr;
 901            size_t chunk_start;
 902            int insn = 0;
 903
 904            if (tcg_ctx->data_gen_ptr) {
 905                rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
 906                code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
 907                data_size = gen_code_size - code_size;
 908            } else {
 909                rx_data_gen_ptr = 0;
 910                code_size = gen_code_size;
 911                data_size = 0;
 912            }
 913
 914            /* Dump header and the first instruction */
 915            fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
 916            fprintf(logfile,
 917                    "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
 918                    tcg_ctx->gen_insn_data[insn][0]);
 919            chunk_start = tcg_ctx->gen_insn_end_off[insn];
 920            disas(logfile, tb->tc.ptr, chunk_start);
 921
 922            /*
 923             * Dump each instruction chunk, wrapping up empty chunks into
 924             * the next instruction. The whole array is offset so the
 925             * first entry is the beginning of the 2nd instruction.
 926             */
 927            while (insn < tb->icount) {
 928                size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
 929                if (chunk_end > chunk_start) {
 930                    fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
 931                            tcg_ctx->gen_insn_data[insn][0]);
 932                    disas(logfile, tb->tc.ptr + chunk_start,
 933                          chunk_end - chunk_start);
 934                    chunk_start = chunk_end;
 935                }
 936                insn++;
 937            }
 938
 939            if (chunk_start < code_size) {
 940                fprintf(logfile, "  -- tb slow paths + alignment\n");
 941                disas(logfile, tb->tc.ptr + chunk_start,
 942                      code_size - chunk_start);
 943            }
 944
 945            /* Finally dump any data we may have after the block */
 946            if (data_size) {
 947                int i;
 948                fprintf(logfile, "  data: [size=%d]\n", data_size);
 949                for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
 950                    if (sizeof(tcg_target_ulong) == 8) {
 951                        fprintf(logfile,
 952                                "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
 953                                (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
 954                    } else if (sizeof(tcg_target_ulong) == 4) {
 955                        fprintf(logfile,
 956                                "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
 957                                (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
 958                    } else {
 959                        qemu_build_not_reached();
 960                    }
 961                }
 962            }
 963            fprintf(logfile, "\n");
 964            qemu_log_unlock(logfile);
 965        }
 966    }
 967#endif
 968
 969    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
 970        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
 971                 CODE_GEN_ALIGN));
 972
 973    /* init jump list */
 974    qemu_spin_init(&tb->jmp_lock);
 975    tb->jmp_list_head = (uintptr_t)NULL;
 976    tb->jmp_list_next[0] = (uintptr_t)NULL;
 977    tb->jmp_list_next[1] = (uintptr_t)NULL;
 978    tb->jmp_dest[0] = (uintptr_t)NULL;
 979    tb->jmp_dest[1] = (uintptr_t)NULL;
 980
 981    /* init original jump addresses which have been set during tcg_gen_code() */
 982    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
 983        tb_reset_jump(tb, 0);
 984    }
 985    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
 986        tb_reset_jump(tb, 1);
 987    }
 988
 989    /*
 990     * If the TB is not associated with a physical RAM page then it must be
 991     * a temporary one-insn TB, and we have nothing left to do. Return early
 992     * before attempting to link to other TBs or add to the lookup table.
 993     */
 994    if (tb_page_addr0(tb) == -1) {
 995        return tb;
 996    }
 997
 998    /*
 999     * Insert TB into the corresponding region tree before publishing it
1000     * through QHT. Otherwise rewinding happened in the TB might fail to
1001     * lookup itself using host PC.
1002     */
1003    tcg_tb_insert(tb);
1004
1005    /*
1006     * No explicit memory barrier is required -- tb_link_page() makes the
1007     * TB visible in a consistent state.
1008     */
1009    existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
1010    /* if the TB already exists, discard what we just translated */
1011    if (unlikely(existing_tb != tb)) {
1012        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1013
1014        orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1015        qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1016        tcg_tb_remove(tb);
1017        return existing_tb;
1018    }
1019    return tb;
1020}
1021
1022/* user-mode: call with mmap_lock held */
1023void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1024{
1025    TranslationBlock *tb;
1026
1027    assert_memory_lock();
1028
1029    tb = tcg_tb_lookup(retaddr);
1030    if (tb) {
1031        /* We can use retranslation to find the PC.  */
1032        cpu_restore_state_from_tb(cpu, tb, retaddr);
1033        tb_phys_invalidate(tb, -1);
1034    } else {
1035        /* The exception probably happened in a helper.  The CPU state should
1036           have been saved before calling it. Fetch the PC from there.  */
1037        CPUArchState *env = cpu->env_ptr;
1038        target_ulong pc, cs_base;
1039        tb_page_addr_t addr;
1040        uint32_t flags;
1041
1042        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1043        addr = get_page_addr_code(env, pc);
1044        if (addr != -1) {
1045            tb_invalidate_phys_range(addr, addr + 1);
1046        }
1047    }
1048}
1049
1050#ifndef CONFIG_USER_ONLY
1051/*
1052 * In deterministic execution mode, instructions doing device I/Os
1053 * must be at the end of the TB.
1054 *
1055 * Called by softmmu_template.h, with iothread mutex not held.
1056 */
1057void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1058{
1059    TranslationBlock *tb;
1060    CPUClass *cc;
1061    uint32_t n;
1062
1063    tb = tcg_tb_lookup(retaddr);
1064    if (!tb) {
1065        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1066                  (void *)retaddr);
1067    }
1068    cpu_restore_state_from_tb(cpu, tb, retaddr);
1069
1070    /*
1071     * Some guests must re-execute the branch when re-executing a delay
1072     * slot instruction.  When this is the case, adjust icount and N
1073     * to account for the re-execution of the branch.
1074     */
1075    n = 1;
1076    cc = CPU_GET_CLASS(cpu);
1077    if (cc->tcg_ops->io_recompile_replay_branch &&
1078        cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1079        cpu_neg(cpu)->icount_decr.u16.low++;
1080        n = 2;
1081    }
1082
1083    /*
1084     * Exit the loop and potentially generate a new TB executing the
1085     * just the I/O insns. We also limit instrumentation to memory
1086     * operations only (which execute after completion) so we don't
1087     * double instrument the instruction.
1088     */
1089    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1090
1091    if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1092        target_ulong pc = log_pc(cpu, tb);
1093        if (qemu_log_in_addr_range(pc)) {
1094            qemu_log("cpu_io_recompile: rewound execution of TB to "
1095                     TARGET_FMT_lx "\n", pc);
1096        }
1097    }
1098
1099    cpu_loop_exit_noexc(cpu);
1100}
1101
1102static void print_qht_statistics(struct qht_stats hst, GString *buf)
1103{
1104    uint32_t hgram_opts;
1105    size_t hgram_bins;
1106    char *hgram;
1107
1108    if (!hst.head_buckets) {
1109        return;
1110    }
1111    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1112                           "(%0.2f%% head buckets used)\n",
1113                           hst.used_head_buckets, hst.head_buckets,
1114                           (double)hst.used_head_buckets /
1115                           hst.head_buckets * 100);
1116
1117    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1118    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1119    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1120        hgram_opts |= QDIST_PR_NODECIMAL;
1121    }
1122    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1123    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1124                           "Histogram: %s\n",
1125                           qdist_avg(&hst.occupancy) * 100, hgram);
1126    g_free(hgram);
1127
1128    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1129    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1130    if (hgram_bins > 10) {
1131        hgram_bins = 10;
1132    } else {
1133        hgram_bins = 0;
1134        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1135    }
1136    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1137    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1138                           "Histogram: %s\n",
1139                           qdist_avg(&hst.chain), hgram);
1140    g_free(hgram);
1141}
1142
1143struct tb_tree_stats {
1144    size_t nb_tbs;
1145    size_t host_size;
1146    size_t target_size;
1147    size_t max_target_size;
1148    size_t direct_jmp_count;
1149    size_t direct_jmp2_count;
1150    size_t cross_page;
1151};
1152
1153static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
1154{
1155    const TranslationBlock *tb = value;
1156    struct tb_tree_stats *tst = data;
1157
1158    tst->nb_tbs++;
1159    tst->host_size += tb->tc.size;
1160    tst->target_size += tb->size;
1161    if (tb->size > tst->max_target_size) {
1162        tst->max_target_size = tb->size;
1163    }
1164    if (tb_page_addr1(tb) != -1) {
1165        tst->cross_page++;
1166    }
1167    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1168        tst->direct_jmp_count++;
1169        if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1170            tst->direct_jmp2_count++;
1171        }
1172    }
1173    return false;
1174}
1175
1176void dump_exec_info(GString *buf)
1177{
1178    struct tb_tree_stats tst = {};
1179    struct qht_stats hst;
1180    size_t nb_tbs, flush_full, flush_part, flush_elide;
1181
1182    tcg_tb_foreach(tb_tree_stats_iter, &tst);
1183    nb_tbs = tst.nb_tbs;
1184    /* XXX: avoid using doubles ? */
1185    g_string_append_printf(buf, "Translation buffer state:\n");
1186    /*
1187     * Report total code size including the padding and TB structs;
1188     * otherwise users might think "-accel tcg,tb-size" is not honoured.
1189     * For avg host size we use the precise numbers from tb_tree_stats though.
1190     */
1191    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
1192                           tcg_code_size(), tcg_code_capacity());
1193    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
1194    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
1195                           nb_tbs ? tst.target_size / nb_tbs : 0,
1196                           tst.max_target_size);
1197    g_string_append_printf(buf, "TB avg host size    %zu bytes "
1198                           "(expansion ratio: %0.1f)\n",
1199                           nb_tbs ? tst.host_size / nb_tbs : 0,
1200                           tst.target_size ?
1201                           (double)tst.host_size / tst.target_size : 0);
1202    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
1203                           tst.cross_page,
1204                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
1205    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
1206                           "(2 jumps=%zu %zu%%)\n",
1207                           tst.direct_jmp_count,
1208                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
1209                           tst.direct_jmp2_count,
1210                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
1211
1212    qht_statistics_init(&tb_ctx.htable, &hst);
1213    print_qht_statistics(hst, buf);
1214    qht_statistics_destroy(&hst);
1215
1216    g_string_append_printf(buf, "\nStatistics:\n");
1217    g_string_append_printf(buf, "TB flush count      %u\n",
1218                           qatomic_read(&tb_ctx.tb_flush_count));
1219    g_string_append_printf(buf, "TB invalidate count %u\n",
1220                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
1221
1222    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
1223    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
1224    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
1225    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
1226    tcg_dump_info(buf);
1227}
1228
1229#else /* CONFIG_USER_ONLY */
1230
1231void cpu_interrupt(CPUState *cpu, int mask)
1232{
1233    g_assert(qemu_mutex_iothread_locked());
1234    cpu->interrupt_request |= mask;
1235    qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1236}
1237
1238/*
1239 * Walks guest process memory "regions" one by one
1240 * and calls callback function 'fn' for each region.
1241 */
1242struct walk_memory_regions_data {
1243    walk_memory_regions_fn fn;
1244    void *priv;
1245    target_ulong start;
1246    int prot;
1247};
1248
1249static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1250                                   target_ulong end, int new_prot)
1251{
1252    if (data->start != -1u) {
1253        int rc = data->fn(data->priv, data->start, end, data->prot);
1254        if (rc != 0) {
1255            return rc;
1256        }
1257    }
1258
1259    data->start = (new_prot ? end : -1u);
1260    data->prot = new_prot;
1261
1262    return 0;
1263}
1264
1265static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1266                                 target_ulong base, int level, void **lp)
1267{
1268    target_ulong pa;
1269    int i, rc;
1270
1271    if (*lp == NULL) {
1272        return walk_memory_regions_end(data, base, 0);
1273    }
1274
1275    if (level == 0) {
1276        PageDesc *pd = *lp;
1277
1278        for (i = 0; i < V_L2_SIZE; ++i) {
1279            int prot = pd[i].flags;
1280
1281            pa = base | (i << TARGET_PAGE_BITS);
1282            if (prot != data->prot) {
1283                rc = walk_memory_regions_end(data, pa, prot);
1284                if (rc != 0) {
1285                    return rc;
1286                }
1287            }
1288        }
1289    } else {
1290        void **pp = *lp;
1291
1292        for (i = 0; i < V_L2_SIZE; ++i) {
1293            pa = base | ((target_ulong)i <<
1294                (TARGET_PAGE_BITS + V_L2_BITS * level));
1295            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1296            if (rc != 0) {
1297                return rc;
1298            }
1299        }
1300    }
1301
1302    return 0;
1303}
1304
1305int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1306{
1307    struct walk_memory_regions_data data;
1308    uintptr_t i, l1_sz = v_l1_size;
1309
1310    data.fn = fn;
1311    data.priv = priv;
1312    data.start = -1u;
1313    data.prot = 0;
1314
1315    for (i = 0; i < l1_sz; i++) {
1316        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
1317        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
1318        if (rc != 0) {
1319            return rc;
1320        }
1321    }
1322
1323    return walk_memory_regions_end(&data, 0, 0);
1324}
1325
1326static int dump_region(void *priv, target_ulong start,
1327    target_ulong end, unsigned long prot)
1328{
1329    FILE *f = (FILE *)priv;
1330
1331    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
1332        " "TARGET_FMT_lx" %c%c%c\n",
1333        start, end, end - start,
1334        ((prot & PAGE_READ) ? 'r' : '-'),
1335        ((prot & PAGE_WRITE) ? 'w' : '-'),
1336        ((prot & PAGE_EXEC) ? 'x' : '-'));
1337
1338    return 0;
1339}
1340
1341/* dump memory mappings */
1342void page_dump(FILE *f)
1343{
1344    const int length = sizeof(target_ulong) * 2;
1345    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
1346            length, "start", length, "end", length, "size", "prot");
1347    walk_memory_regions(f, dump_region);
1348}
1349
1350int page_get_flags(target_ulong address)
1351{
1352    PageDesc *p;
1353
1354    p = page_find(address >> TARGET_PAGE_BITS);
1355    if (!p) {
1356        return 0;
1357    }
1358    return p->flags;
1359}
1360
1361/*
1362 * Allow the target to decide if PAGE_TARGET_[12] may be reset.
1363 * By default, they are not kept.
1364 */
1365#ifndef PAGE_TARGET_STICKY
1366#define PAGE_TARGET_STICKY  0
1367#endif
1368#define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
1369
1370/* Modify the flags of a page and invalidate the code if necessary.
1371   The flag PAGE_WRITE_ORG is positioned automatically depending
1372   on PAGE_WRITE.  The mmap_lock should already be held.  */
1373void page_set_flags(target_ulong start, target_ulong end, int flags)
1374{
1375    target_ulong addr, len;
1376    bool reset, inval_tb = false;
1377
1378    /* This function should never be called with addresses outside the
1379       guest address space.  If this assert fires, it probably indicates
1380       a missing call to h2g_valid.  */
1381    assert(end - 1 <= GUEST_ADDR_MAX);
1382    assert(start < end);
1383    /* Only set PAGE_ANON with new mappings. */
1384    assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
1385    assert_memory_lock();
1386
1387    start = start & TARGET_PAGE_MASK;
1388    end = TARGET_PAGE_ALIGN(end);
1389
1390    if (flags & PAGE_WRITE) {
1391        flags |= PAGE_WRITE_ORG;
1392    }
1393    reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
1394    if (reset) {
1395        page_reset_target_data(start, end);
1396    }
1397    flags &= ~PAGE_RESET;
1398
1399    for (addr = start, len = end - start;
1400         len != 0;
1401         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1402        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
1403
1404        /*
1405         * If the page was executable, but is reset, or is no longer
1406         * executable, or has become writable, then invalidate any code.
1407         */
1408        if ((p->flags & PAGE_EXEC)
1409            && (reset ||
1410                !(flags & PAGE_EXEC) ||
1411                (flags & ~p->flags & PAGE_WRITE))) {
1412            inval_tb = true;
1413        }
1414        /* Using mprotect on a page does not change sticky bits. */
1415        p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
1416    }
1417
1418    if (inval_tb) {
1419        tb_invalidate_phys_range(start, end);
1420    }
1421}
1422
1423int page_check_range(target_ulong start, target_ulong len, int flags)
1424{
1425    PageDesc *p;
1426    target_ulong end;
1427    target_ulong addr;
1428
1429    /* This function should never be called with addresses outside the
1430       guest address space.  If this assert fires, it probably indicates
1431       a missing call to h2g_valid.  */
1432    if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
1433        assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
1434    }
1435
1436    if (len == 0) {
1437        return 0;
1438    }
1439    if (start + len - 1 < start) {
1440        /* We've wrapped around.  */
1441        return -1;
1442    }
1443
1444    /* must do before we loose bits in the next step */
1445    end = TARGET_PAGE_ALIGN(start + len);
1446    start = start & TARGET_PAGE_MASK;
1447
1448    for (addr = start, len = end - start;
1449         len != 0;
1450         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1451        p = page_find(addr >> TARGET_PAGE_BITS);
1452        if (!p) {
1453            return -1;
1454        }
1455        if (!(p->flags & PAGE_VALID)) {
1456            return -1;
1457        }
1458
1459        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
1460            return -1;
1461        }
1462        if (flags & PAGE_WRITE) {
1463            if (!(p->flags & PAGE_WRITE_ORG)) {
1464                return -1;
1465            }
1466            /* unprotect the page if it was put read-only because it
1467               contains translated code */
1468            if (!(p->flags & PAGE_WRITE)) {
1469                if (!page_unprotect(addr, 0)) {
1470                    return -1;
1471                }
1472            }
1473        }
1474    }
1475    return 0;
1476}
1477
1478void page_protect(tb_page_addr_t page_addr)
1479{
1480    target_ulong addr;
1481    PageDesc *p;
1482    int prot;
1483
1484    p = page_find(page_addr >> TARGET_PAGE_BITS);
1485    if (p && (p->flags & PAGE_WRITE)) {
1486        /*
1487         * Force the host page as non writable (writes will have a page fault +
1488         * mprotect overhead).
1489         */
1490        page_addr &= qemu_host_page_mask;
1491        prot = 0;
1492        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1493             addr += TARGET_PAGE_SIZE) {
1494
1495            p = page_find(addr >> TARGET_PAGE_BITS);
1496            if (!p) {
1497                continue;
1498            }
1499            prot |= p->flags;
1500            p->flags &= ~PAGE_WRITE;
1501        }
1502        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1503                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1504    }
1505}
1506
1507/* called from signal handler: invalidate the code and unprotect the
1508 * page. Return 0 if the fault was not handled, 1 if it was handled,
1509 * and 2 if it was handled but the caller must cause the TB to be
1510 * immediately exited. (We can only return 2 if the 'pc' argument is
1511 * non-zero.)
1512 */
1513int page_unprotect(target_ulong address, uintptr_t pc)
1514{
1515    unsigned int prot;
1516    bool current_tb_invalidated;
1517    PageDesc *p;
1518    target_ulong host_start, host_end, addr;
1519
1520    /* Technically this isn't safe inside a signal handler.  However we
1521       know this only ever happens in a synchronous SEGV handler, so in
1522       practice it seems to be ok.  */
1523    mmap_lock();
1524
1525    p = page_find(address >> TARGET_PAGE_BITS);
1526    if (!p) {
1527        mmap_unlock();
1528        return 0;
1529    }
1530
1531    /* if the page was really writable, then we change its
1532       protection back to writable */
1533    if (p->flags & PAGE_WRITE_ORG) {
1534        current_tb_invalidated = false;
1535        if (p->flags & PAGE_WRITE) {
1536            /* If the page is actually marked WRITE then assume this is because
1537             * this thread raced with another one which got here first and
1538             * set the page to PAGE_WRITE and did the TB invalidate for us.
1539             */
1540#ifdef TARGET_HAS_PRECISE_SMC
1541            TranslationBlock *current_tb = tcg_tb_lookup(pc);
1542            if (current_tb) {
1543                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
1544            }
1545#endif
1546        } else {
1547            host_start = address & qemu_host_page_mask;
1548            host_end = host_start + qemu_host_page_size;
1549
1550            prot = 0;
1551            for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
1552                p = page_find(addr >> TARGET_PAGE_BITS);
1553                p->flags |= PAGE_WRITE;
1554                prot |= p->flags;
1555
1556                /* and since the content will be modified, we must invalidate
1557                   the corresponding translated code. */
1558                current_tb_invalidated |=
1559                    tb_invalidate_phys_page_unwind(addr, pc);
1560            }
1561            mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
1562                     prot & PAGE_BITS);
1563        }
1564        mmap_unlock();
1565        /* If current TB was invalidated return to main loop */
1566        return current_tb_invalidated ? 2 : 1;
1567    }
1568    mmap_unlock();
1569    return 0;
1570}
1571#endif /* CONFIG_USER_ONLY */
1572
1573/*
1574 * Called by generic code at e.g. cpu reset after cpu creation,
1575 * therefore we must be prepared to allocate the jump cache.
1576 */
1577void tcg_flush_jmp_cache(CPUState *cpu)
1578{
1579    CPUJumpCache *jc = cpu->tb_jmp_cache;
1580
1581    /* During early initialization, the cache may not yet be allocated. */
1582    if (unlikely(jc == NULL)) {
1583        return;
1584    }
1585
1586    for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
1587        qatomic_set(&jc->array[i].tb, NULL);
1588    }
1589}
1590
1591/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
1592void tcg_flush_softmmu_tlb(CPUState *cs)
1593{
1594#ifdef CONFIG_SOFTMMU
1595    tlb_flush(cs);
1596#endif
1597}
1598