qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  virtual page mapping and translated block handling
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifdef _WIN32
  21#include <windows.h>
  22#else
  23#include <sys/types.h>
  24#include <sys/mman.h>
  25#endif
  26
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "tcg.h"
  30#include "hw/hw.h"
  31#include "hw/qdev.h"
  32#include "osdep.h"
  33#include "kvm.h"
  34#include "hw/xen.h"
  35#include "qemu-timer.h"
  36#include "memory.h"
  37#include "exec-memory.h"
  38#if defined(CONFIG_USER_ONLY)
  39#include <qemu.h>
  40#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  41#include <sys/param.h>
  42#if __FreeBSD_version >= 700104
  43#define HAVE_KINFO_GETVMMAP
  44#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  45#include <sys/time.h>
  46#include <sys/proc.h>
  47#include <machine/profile.h>
  48#define _KERNEL
  49#include <sys/user.h>
  50#undef _KERNEL
  51#undef sigqueue
  52#include <libutil.h>
  53#endif
  54#endif
  55#else /* !CONFIG_USER_ONLY */
  56#include "xen-mapcache.h"
  57#include "trace.h"
  58#endif
  59
  60#include "cputlb.h"
  61
  62#define WANT_EXEC_OBSOLETE
  63#include "exec-obsolete.h"
  64
  65//#define DEBUG_TB_INVALIDATE
  66//#define DEBUG_FLUSH
  67//#define DEBUG_UNASSIGNED
  68
  69/* make various TB consistency checks */
  70//#define DEBUG_TB_CHECK
  71
  72//#define DEBUG_IOPORT
  73//#define DEBUG_SUBPAGE
  74
  75#if !defined(CONFIG_USER_ONLY)
  76/* TB consistency checks only implemented for usermode emulation.  */
  77#undef DEBUG_TB_CHECK
  78#endif
  79
  80#define SMC_BITMAP_USE_THRESHOLD 10
  81
  82static TranslationBlock *tbs;
  83static int code_gen_max_blocks;
  84TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
  85static int nb_tbs;
  86/* any access to the tbs or the page table must use this lock */
  87spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
  88
  89#if defined(__arm__) || defined(__sparc_v9__)
  90/* The prologue must be reachable with a direct jump. ARM and Sparc64
  91 have limited branch ranges (possibly also PPC) so place it in a
  92 section close to code segment. */
  93#define code_gen_section                                \
  94    __attribute__((__section__(".gen_code")))           \
  95    __attribute__((aligned (32)))
  96#elif defined(_WIN32) && !defined(_WIN64)
  97#define code_gen_section                                \
  98    __attribute__((aligned (16)))
  99#else
 100#define code_gen_section                                \
 101    __attribute__((aligned (32)))
 102#endif
 103
 104uint8_t code_gen_prologue[1024] code_gen_section;
 105static uint8_t *code_gen_buffer;
 106static unsigned long code_gen_buffer_size;
 107/* threshold to flush the translated code buffer */
 108static unsigned long code_gen_buffer_max_size;
 109static uint8_t *code_gen_ptr;
 110
 111#if !defined(CONFIG_USER_ONLY)
 112int phys_ram_fd;
 113static int in_migration;
 114
 115RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
 116
 117static MemoryRegion *system_memory;
 118static MemoryRegion *system_io;
 119
 120MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
 121static MemoryRegion io_mem_subpage_ram;
 122
 123#endif
 124
 125CPUArchState *first_cpu;
 126/* current CPU in the current thread. It is only valid inside
 127   cpu_exec() */
 128DEFINE_TLS(CPUArchState *,cpu_single_env);
 129/* 0 = Do not count executed instructions.
 130   1 = Precise instruction counting.
 131   2 = Adaptive rate instruction counting.  */
 132int use_icount = 0;
 133
 134typedef struct PageDesc {
 135    /* list of TBs intersecting this ram page */
 136    TranslationBlock *first_tb;
 137    /* in order to optimize self modifying code, we count the number
 138       of lookups we do to a given page to use a bitmap */
 139    unsigned int code_write_count;
 140    uint8_t *code_bitmap;
 141#if defined(CONFIG_USER_ONLY)
 142    unsigned long flags;
 143#endif
 144} PageDesc;
 145
 146/* In system mode we want L1_MAP to be based on ram offsets,
 147   while in user mode we want it to be based on virtual addresses.  */
 148#if !defined(CONFIG_USER_ONLY)
 149#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 150# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 151#else
 152# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 153#endif
 154#else
 155# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
 156#endif
 157
 158/* Size of the L2 (and L3, etc) page tables.  */
 159#define L2_BITS 10
 160#define L2_SIZE (1 << L2_BITS)
 161
 162#define P_L2_LEVELS \
 163    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
 164
 165/* The bits remaining after N lower levels of page tables.  */
 166#define V_L1_BITS_REM \
 167    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
 168
 169#if V_L1_BITS_REM < 4
 170#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
 171#else
 172#define V_L1_BITS  V_L1_BITS_REM
 173#endif
 174
 175#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
 176
 177#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 178
 179uintptr_t qemu_real_host_page_size;
 180uintptr_t qemu_host_page_size;
 181uintptr_t qemu_host_page_mask;
 182
 183/* This is a multi-level map on the virtual address space.
 184   The bottom level has pointers to PageDesc.  */
 185static void *l1_map[V_L1_SIZE];
 186
 187#if !defined(CONFIG_USER_ONLY)
 188typedef struct PhysPageEntry PhysPageEntry;
 189
 190static MemoryRegionSection *phys_sections;
 191static unsigned phys_sections_nb, phys_sections_nb_alloc;
 192static uint16_t phys_section_unassigned;
 193static uint16_t phys_section_notdirty;
 194static uint16_t phys_section_rom;
 195static uint16_t phys_section_watch;
 196
 197struct PhysPageEntry {
 198    uint16_t is_leaf : 1;
 199     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
 200    uint16_t ptr : 15;
 201};
 202
 203/* Simple allocator for PhysPageEntry nodes */
 204static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 205static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
 206
 207#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 208
 209/* This is a multi-level map on the physical address space.
 210   The bottom level has pointers to MemoryRegionSections.  */
 211static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
 212
 213static void io_mem_init(void);
 214static void memory_map_init(void);
 215
 216static MemoryRegion io_mem_watch;
 217#endif
 218
 219/* statistics */
 220static int tb_flush_count;
 221static int tb_phys_invalidate_count;
 222
 223#ifdef _WIN32
 224static void map_exec(void *addr, long size)
 225{
 226    DWORD old_protect;
 227    VirtualProtect(addr, size,
 228                   PAGE_EXECUTE_READWRITE, &old_protect);
 229    
 230}
 231#else
 232static void map_exec(void *addr, long size)
 233{
 234    unsigned long start, end, page_size;
 235    
 236    page_size = getpagesize();
 237    start = (unsigned long)addr;
 238    start &= ~(page_size - 1);
 239    
 240    end = (unsigned long)addr + size;
 241    end += page_size - 1;
 242    end &= ~(page_size - 1);
 243    
 244    mprotect((void *)start, end - start,
 245             PROT_READ | PROT_WRITE | PROT_EXEC);
 246}
 247#endif
 248
 249static void page_init(void)
 250{
 251    /* NOTE: we can always suppose that qemu_host_page_size >=
 252       TARGET_PAGE_SIZE */
 253#ifdef _WIN32
 254    {
 255        SYSTEM_INFO system_info;
 256
 257        GetSystemInfo(&system_info);
 258        qemu_real_host_page_size = system_info.dwPageSize;
 259    }
 260#else
 261    qemu_real_host_page_size = getpagesize();
 262#endif
 263    if (qemu_host_page_size == 0)
 264        qemu_host_page_size = qemu_real_host_page_size;
 265    if (qemu_host_page_size < TARGET_PAGE_SIZE)
 266        qemu_host_page_size = TARGET_PAGE_SIZE;
 267    qemu_host_page_mask = ~(qemu_host_page_size - 1);
 268
 269#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 270    {
 271#ifdef HAVE_KINFO_GETVMMAP
 272        struct kinfo_vmentry *freep;
 273        int i, cnt;
 274
 275        freep = kinfo_getvmmap(getpid(), &cnt);
 276        if (freep) {
 277            mmap_lock();
 278            for (i = 0; i < cnt; i++) {
 279                unsigned long startaddr, endaddr;
 280
 281                startaddr = freep[i].kve_start;
 282                endaddr = freep[i].kve_end;
 283                if (h2g_valid(startaddr)) {
 284                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 285
 286                    if (h2g_valid(endaddr)) {
 287                        endaddr = h2g(endaddr);
 288                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 289                    } else {
 290#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 291                        endaddr = ~0ul;
 292                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 293#endif
 294                    }
 295                }
 296            }
 297            free(freep);
 298            mmap_unlock();
 299        }
 300#else
 301        FILE *f;
 302
 303        last_brk = (unsigned long)sbrk(0);
 304
 305        f = fopen("/compat/linux/proc/self/maps", "r");
 306        if (f) {
 307            mmap_lock();
 308
 309            do {
 310                unsigned long startaddr, endaddr;
 311                int n;
 312
 313                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 314
 315                if (n == 2 && h2g_valid(startaddr)) {
 316                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 317
 318                    if (h2g_valid(endaddr)) {
 319                        endaddr = h2g(endaddr);
 320                    } else {
 321                        endaddr = ~0ul;
 322                    }
 323                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 324                }
 325            } while (!feof(f));
 326
 327            fclose(f);
 328            mmap_unlock();
 329        }
 330#endif
 331    }
 332#endif
 333}
 334
 335static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 336{
 337    PageDesc *pd;
 338    void **lp;
 339    int i;
 340
 341#if defined(CONFIG_USER_ONLY)
 342    /* We can't use g_malloc because it may recurse into a locked mutex. */
 343# define ALLOC(P, SIZE)                                 \
 344    do {                                                \
 345        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
 346                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
 347    } while (0)
 348#else
 349# define ALLOC(P, SIZE) \
 350    do { P = g_malloc0(SIZE); } while (0)
 351#endif
 352
 353    /* Level 1.  Always allocated.  */
 354    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
 355
 356    /* Level 2..N-1.  */
 357    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
 358        void **p = *lp;
 359
 360        if (p == NULL) {
 361            if (!alloc) {
 362                return NULL;
 363            }
 364            ALLOC(p, sizeof(void *) * L2_SIZE);
 365            *lp = p;
 366        }
 367
 368        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
 369    }
 370
 371    pd = *lp;
 372    if (pd == NULL) {
 373        if (!alloc) {
 374            return NULL;
 375        }
 376        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
 377        *lp = pd;
 378    }
 379
 380#undef ALLOC
 381
 382    return pd + (index & (L2_SIZE - 1));
 383}
 384
 385static inline PageDesc *page_find(tb_page_addr_t index)
 386{
 387    return page_find_alloc(index, 0);
 388}
 389
 390#if !defined(CONFIG_USER_ONLY)
 391
 392static void phys_map_node_reserve(unsigned nodes)
 393{
 394    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
 395        typedef PhysPageEntry Node[L2_SIZE];
 396        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
 397        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
 398                                      phys_map_nodes_nb + nodes);
 399        phys_map_nodes = g_renew(Node, phys_map_nodes,
 400                                 phys_map_nodes_nb_alloc);
 401    }
 402}
 403
 404static uint16_t phys_map_node_alloc(void)
 405{
 406    unsigned i;
 407    uint16_t ret;
 408
 409    ret = phys_map_nodes_nb++;
 410    assert(ret != PHYS_MAP_NODE_NIL);
 411    assert(ret != phys_map_nodes_nb_alloc);
 412    for (i = 0; i < L2_SIZE; ++i) {
 413        phys_map_nodes[ret][i].is_leaf = 0;
 414        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 415    }
 416    return ret;
 417}
 418
 419static void phys_map_nodes_reset(void)
 420{
 421    phys_map_nodes_nb = 0;
 422}
 423
 424
 425static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
 426                                target_phys_addr_t *nb, uint16_t leaf,
 427                                int level)
 428{
 429    PhysPageEntry *p;
 430    int i;
 431    target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
 432
 433    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
 434        lp->ptr = phys_map_node_alloc();
 435        p = phys_map_nodes[lp->ptr];
 436        if (level == 0) {
 437            for (i = 0; i < L2_SIZE; i++) {
 438                p[i].is_leaf = 1;
 439                p[i].ptr = phys_section_unassigned;
 440            }
 441        }
 442    } else {
 443        p = phys_map_nodes[lp->ptr];
 444    }
 445    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 446
 447    while (*nb && lp < &p[L2_SIZE]) {
 448        if ((*index & (step - 1)) == 0 && *nb >= step) {
 449            lp->is_leaf = true;
 450            lp->ptr = leaf;
 451            *index += step;
 452            *nb -= step;
 453        } else {
 454            phys_page_set_level(lp, index, nb, leaf, level - 1);
 455        }
 456        ++lp;
 457    }
 458}
 459
 460static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
 461                          uint16_t leaf)
 462{
 463    /* Wildly overreserve - it doesn't matter much. */
 464    phys_map_node_reserve(3 * P_L2_LEVELS);
 465
 466    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 467}
 468
 469MemoryRegionSection *phys_page_find(target_phys_addr_t index)
 470{
 471    PhysPageEntry lp = phys_map;
 472    PhysPageEntry *p;
 473    int i;
 474    uint16_t s_index = phys_section_unassigned;
 475
 476    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
 477        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 478            goto not_found;
 479        }
 480        p = phys_map_nodes[lp.ptr];
 481        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
 482    }
 483
 484    s_index = lp.ptr;
 485not_found:
 486    return &phys_sections[s_index];
 487}
 488
 489bool memory_region_is_unassigned(MemoryRegion *mr)
 490{
 491    return mr != &io_mem_ram && mr != &io_mem_rom
 492        && mr != &io_mem_notdirty && !mr->rom_device
 493        && mr != &io_mem_watch;
 494}
 495
 496#define mmap_lock() do { } while(0)
 497#define mmap_unlock() do { } while(0)
 498#endif
 499
 500#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
 501
 502#if defined(CONFIG_USER_ONLY)
 503/* Currently it is not recommended to allocate big chunks of data in
 504   user mode. It will change when a dedicated libc will be used */
 505#define USE_STATIC_CODE_GEN_BUFFER
 506#endif
 507
 508#ifdef USE_STATIC_CODE_GEN_BUFFER
 509static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 510               __attribute__((aligned (CODE_GEN_ALIGN)));
 511#endif
 512
 513static void code_gen_alloc(unsigned long tb_size)
 514{
 515#ifdef USE_STATIC_CODE_GEN_BUFFER
 516    code_gen_buffer = static_code_gen_buffer;
 517    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 518    map_exec(code_gen_buffer, code_gen_buffer_size);
 519#else
 520    code_gen_buffer_size = tb_size;
 521    if (code_gen_buffer_size == 0) {
 522#if defined(CONFIG_USER_ONLY)
 523        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 524#else
 525        /* XXX: needs adjustments */
 526        code_gen_buffer_size = (unsigned long)(ram_size / 4);
 527#endif
 528    }
 529    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
 530        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
 531    /* The code gen buffer location may have constraints depending on
 532       the host cpu and OS */
 533#if defined(__linux__) 
 534    {
 535        int flags;
 536        void *start = NULL;
 537
 538        flags = MAP_PRIVATE | MAP_ANONYMOUS;
 539#if defined(__x86_64__)
 540        flags |= MAP_32BIT;
 541        /* Cannot map more than that */
 542        if (code_gen_buffer_size > (800 * 1024 * 1024))
 543            code_gen_buffer_size = (800 * 1024 * 1024);
 544#elif defined(__sparc_v9__)
 545        // Map the buffer below 2G, so we can use direct calls and branches
 546        flags |= MAP_FIXED;
 547        start = (void *) 0x60000000UL;
 548        if (code_gen_buffer_size > (512 * 1024 * 1024))
 549            code_gen_buffer_size = (512 * 1024 * 1024);
 550#elif defined(__arm__)
 551        /* Keep the buffer no bigger than 16MB to branch between blocks */
 552        if (code_gen_buffer_size > 16 * 1024 * 1024)
 553            code_gen_buffer_size = 16 * 1024 * 1024;
 554#elif defined(__s390x__)
 555        /* Map the buffer so that we can use direct calls and branches.  */
 556        /* We have a +- 4GB range on the branches; leave some slop.  */
 557        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
 558            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
 559        }
 560        start = (void *)0x90000000UL;
 561#endif
 562        code_gen_buffer = mmap(start, code_gen_buffer_size,
 563                               PROT_WRITE | PROT_READ | PROT_EXEC,
 564                               flags, -1, 0);
 565        if (code_gen_buffer == MAP_FAILED) {
 566            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
 567            exit(1);
 568        }
 569    }
 570#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
 571    || defined(__DragonFly__) || defined(__OpenBSD__) \
 572    || defined(__NetBSD__)
 573    {
 574        int flags;
 575        void *addr = NULL;
 576        flags = MAP_PRIVATE | MAP_ANONYMOUS;
 577#if defined(__x86_64__)
 578        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
 579         * 0x40000000 is free */
 580        flags |= MAP_FIXED;
 581        addr = (void *)0x40000000;
 582        /* Cannot map more than that */
 583        if (code_gen_buffer_size > (800 * 1024 * 1024))
 584            code_gen_buffer_size = (800 * 1024 * 1024);
 585#elif defined(__sparc_v9__)
 586        // Map the buffer below 2G, so we can use direct calls and branches
 587        flags |= MAP_FIXED;
 588        addr = (void *) 0x60000000UL;
 589        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
 590            code_gen_buffer_size = (512 * 1024 * 1024);
 591        }
 592#endif
 593        code_gen_buffer = mmap(addr, code_gen_buffer_size,
 594                               PROT_WRITE | PROT_READ | PROT_EXEC, 
 595                               flags, -1, 0);
 596        if (code_gen_buffer == MAP_FAILED) {
 597            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
 598            exit(1);
 599        }
 600    }
 601#else
 602    code_gen_buffer = g_malloc(code_gen_buffer_size);
 603    map_exec(code_gen_buffer, code_gen_buffer_size);
 604#endif
 605#endif /* !USE_STATIC_CODE_GEN_BUFFER */
 606    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
 607    code_gen_buffer_max_size = code_gen_buffer_size -
 608        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
 609    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
 610    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
 611}
 612
 613/* Must be called before using the QEMU cpus. 'tb_size' is the size
 614   (in bytes) allocated to the translation buffer. Zero means default
 615   size. */
 616void tcg_exec_init(unsigned long tb_size)
 617{
 618    cpu_gen_init();
 619    code_gen_alloc(tb_size);
 620    code_gen_ptr = code_gen_buffer;
 621    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
 622    page_init();
 623#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
 624    /* There's no guest base to take into account, so go ahead and
 625       initialize the prologue now.  */
 626    tcg_prologue_init(&tcg_ctx);
 627#endif
 628}
 629
 630bool tcg_enabled(void)
 631{
 632    return code_gen_buffer != NULL;
 633}
 634
 635void cpu_exec_init_all(void)
 636{
 637#if !defined(CONFIG_USER_ONLY)
 638    memory_map_init();
 639    io_mem_init();
 640#endif
 641}
 642
 643#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 644
 645static int cpu_common_post_load(void *opaque, int version_id)
 646{
 647    CPUArchState *env = opaque;
 648
 649    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 650       version_id is increased. */
 651    env->interrupt_request &= ~0x01;
 652    tlb_flush(env, 1);
 653
 654    return 0;
 655}
 656
 657static const VMStateDescription vmstate_cpu_common = {
 658    .name = "cpu_common",
 659    .version_id = 1,
 660    .minimum_version_id = 1,
 661    .minimum_version_id_old = 1,
 662    .post_load = cpu_common_post_load,
 663    .fields      = (VMStateField []) {
 664        VMSTATE_UINT32(halted, CPUArchState),
 665        VMSTATE_UINT32(interrupt_request, CPUArchState),
 666        VMSTATE_END_OF_LIST()
 667    }
 668};
 669#endif
 670
 671CPUArchState *qemu_get_cpu(int cpu)
 672{
 673    CPUArchState *env = first_cpu;
 674
 675    while (env) {
 676        if (env->cpu_index == cpu)
 677            break;
 678        env = env->next_cpu;
 679    }
 680
 681    return env;
 682}
 683
 684void cpu_exec_init(CPUArchState *env)
 685{
 686    CPUArchState **penv;
 687    int cpu_index;
 688
 689#if defined(CONFIG_USER_ONLY)
 690    cpu_list_lock();
 691#endif
 692    env->next_cpu = NULL;
 693    penv = &first_cpu;
 694    cpu_index = 0;
 695    while (*penv != NULL) {
 696        penv = &(*penv)->next_cpu;
 697        cpu_index++;
 698    }
 699    env->cpu_index = cpu_index;
 700    env->numa_node = 0;
 701    QTAILQ_INIT(&env->breakpoints);
 702    QTAILQ_INIT(&env->watchpoints);
 703#ifndef CONFIG_USER_ONLY
 704    env->thread_id = qemu_get_thread_id();
 705#endif
 706    *penv = env;
 707#if defined(CONFIG_USER_ONLY)
 708    cpu_list_unlock();
 709#endif
 710#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 711    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
 712    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 713                    cpu_save, cpu_load, env);
 714#endif
 715}
 716
 717/* Allocate a new translation block. Flush the translation buffer if
 718   too many translation blocks or too much generated code. */
 719static TranslationBlock *tb_alloc(target_ulong pc)
 720{
 721    TranslationBlock *tb;
 722
 723    if (nb_tbs >= code_gen_max_blocks ||
 724        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
 725        return NULL;
 726    tb = &tbs[nb_tbs++];
 727    tb->pc = pc;
 728    tb->cflags = 0;
 729    return tb;
 730}
 731
 732void tb_free(TranslationBlock *tb)
 733{
 734    /* In practice this is mostly used for single use temporary TB
 735       Ignore the hard cases and just back up if this TB happens to
 736       be the last one generated.  */
 737    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
 738        code_gen_ptr = tb->tc_ptr;
 739        nb_tbs--;
 740    }
 741}
 742
 743static inline void invalidate_page_bitmap(PageDesc *p)
 744{
 745    if (p->code_bitmap) {
 746        g_free(p->code_bitmap);
 747        p->code_bitmap = NULL;
 748    }
 749    p->code_write_count = 0;
 750}
 751
 752/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 753
 754static void page_flush_tb_1 (int level, void **lp)
 755{
 756    int i;
 757
 758    if (*lp == NULL) {
 759        return;
 760    }
 761    if (level == 0) {
 762        PageDesc *pd = *lp;
 763        for (i = 0; i < L2_SIZE; ++i) {
 764            pd[i].first_tb = NULL;
 765            invalidate_page_bitmap(pd + i);
 766        }
 767    } else {
 768        void **pp = *lp;
 769        for (i = 0; i < L2_SIZE; ++i) {
 770            page_flush_tb_1 (level - 1, pp + i);
 771        }
 772    }
 773}
 774
 775static void page_flush_tb(void)
 776{
 777    int i;
 778    for (i = 0; i < V_L1_SIZE; i++) {
 779        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
 780    }
 781}
 782
 783/* flush all the translation blocks */
 784/* XXX: tb_flush is currently not thread safe */
 785void tb_flush(CPUArchState *env1)
 786{
 787    CPUArchState *env;
 788#if defined(DEBUG_FLUSH)
 789    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
 790           (unsigned long)(code_gen_ptr - code_gen_buffer),
 791           nb_tbs, nb_tbs > 0 ?
 792           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
 793#endif
 794    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
 795        cpu_abort(env1, "Internal error: code buffer overflow\n");
 796
 797    nb_tbs = 0;
 798
 799    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 800        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
 801    }
 802
 803    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
 804    page_flush_tb();
 805
 806    code_gen_ptr = code_gen_buffer;
 807    /* XXX: flush processor icache at this point if cache flush is
 808       expensive */
 809    tb_flush_count++;
 810}
 811
 812#ifdef DEBUG_TB_CHECK
 813
 814static void tb_invalidate_check(target_ulong address)
 815{
 816    TranslationBlock *tb;
 817    int i;
 818    address &= TARGET_PAGE_MASK;
 819    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 820        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 821            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
 822                  address >= tb->pc + tb->size)) {
 823                printf("ERROR invalidate: address=" TARGET_FMT_lx
 824                       " PC=%08lx size=%04x\n",
 825                       address, (long)tb->pc, tb->size);
 826            }
 827        }
 828    }
 829}
 830
 831/* verify that all the pages have correct rights for code */
 832static void tb_page_check(void)
 833{
 834    TranslationBlock *tb;
 835    int i, flags1, flags2;
 836
 837    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 838        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 839            flags1 = page_get_flags(tb->pc);
 840            flags2 = page_get_flags(tb->pc + tb->size - 1);
 841            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
 842                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
 843                       (long)tb->pc, tb->size, flags1, flags2);
 844            }
 845        }
 846    }
 847}
 848
 849#endif
 850
 851/* invalidate one TB */
 852static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
 853                             int next_offset)
 854{
 855    TranslationBlock *tb1;
 856    for(;;) {
 857        tb1 = *ptb;
 858        if (tb1 == tb) {
 859            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
 860            break;
 861        }
 862        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
 863    }
 864}
 865
 866static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
 867{
 868    TranslationBlock *tb1;
 869    unsigned int n1;
 870
 871    for(;;) {
 872        tb1 = *ptb;
 873        n1 = (uintptr_t)tb1 & 3;
 874        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 875        if (tb1 == tb) {
 876            *ptb = tb1->page_next[n1];
 877            break;
 878        }
 879        ptb = &tb1->page_next[n1];
 880    }
 881}
 882
 883static inline void tb_jmp_remove(TranslationBlock *tb, int n)
 884{
 885    TranslationBlock *tb1, **ptb;
 886    unsigned int n1;
 887
 888    ptb = &tb->jmp_next[n];
 889    tb1 = *ptb;
 890    if (tb1) {
 891        /* find tb(n) in circular list */
 892        for(;;) {
 893            tb1 = *ptb;
 894            n1 = (uintptr_t)tb1 & 3;
 895            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 896            if (n1 == n && tb1 == tb)
 897                break;
 898            if (n1 == 2) {
 899                ptb = &tb1->jmp_first;
 900            } else {
 901                ptb = &tb1->jmp_next[n1];
 902            }
 903        }
 904        /* now we can suppress tb(n) from the list */
 905        *ptb = tb->jmp_next[n];
 906
 907        tb->jmp_next[n] = NULL;
 908    }
 909}
 910
 911/* reset the jump entry 'n' of a TB so that it is not chained to
 912   another TB */
 913static inline void tb_reset_jump(TranslationBlock *tb, int n)
 914{
 915    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
 916}
 917
 918void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 919{
 920    CPUArchState *env;
 921    PageDesc *p;
 922    unsigned int h, n1;
 923    tb_page_addr_t phys_pc;
 924    TranslationBlock *tb1, *tb2;
 925
 926    /* remove the TB from the hash list */
 927    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
 928    h = tb_phys_hash_func(phys_pc);
 929    tb_remove(&tb_phys_hash[h], tb,
 930              offsetof(TranslationBlock, phys_hash_next));
 931
 932    /* remove the TB from the page list */
 933    if (tb->page_addr[0] != page_addr) {
 934        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 935        tb_page_remove(&p->first_tb, tb);
 936        invalidate_page_bitmap(p);
 937    }
 938    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
 939        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 940        tb_page_remove(&p->first_tb, tb);
 941        invalidate_page_bitmap(p);
 942    }
 943
 944    tb_invalidated_flag = 1;
 945
 946    /* remove the TB from the hash list */
 947    h = tb_jmp_cache_hash_func(tb->pc);
 948    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 949        if (env->tb_jmp_cache[h] == tb)
 950            env->tb_jmp_cache[h] = NULL;
 951    }
 952
 953    /* suppress this TB from the two jump lists */
 954    tb_jmp_remove(tb, 0);
 955    tb_jmp_remove(tb, 1);
 956
 957    /* suppress any remaining jumps to this TB */
 958    tb1 = tb->jmp_first;
 959    for(;;) {
 960        n1 = (uintptr_t)tb1 & 3;
 961        if (n1 == 2)
 962            break;
 963        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 964        tb2 = tb1->jmp_next[n1];
 965        tb_reset_jump(tb1, n1);
 966        tb1->jmp_next[n1] = NULL;
 967        tb1 = tb2;
 968    }
 969    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
 970
 971    tb_phys_invalidate_count++;
 972}
 973
 974static inline void set_bits(uint8_t *tab, int start, int len)
 975{
 976    int end, mask, end1;
 977
 978    end = start + len;
 979    tab += start >> 3;
 980    mask = 0xff << (start & 7);
 981    if ((start & ~7) == (end & ~7)) {
 982        if (start < end) {
 983            mask &= ~(0xff << (end & 7));
 984            *tab |= mask;
 985        }
 986    } else {
 987        *tab++ |= mask;
 988        start = (start + 8) & ~7;
 989        end1 = end & ~7;
 990        while (start < end1) {
 991            *tab++ = 0xff;
 992            start += 8;
 993        }
 994        if (start < end) {
 995            mask = ~(0xff << (end & 7));
 996            *tab |= mask;
 997        }
 998    }
 999}
1000
1001static void build_page_bitmap(PageDesc *p)
1002{
1003    int n, tb_start, tb_end;
1004    TranslationBlock *tb;
1005
1006    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1007
1008    tb = p->first_tb;
1009    while (tb != NULL) {
1010        n = (uintptr_t)tb & 3;
1011        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1012        /* NOTE: this is subtle as a TB may span two physical pages */
1013        if (n == 0) {
1014            /* NOTE: tb_end may be after the end of the page, but
1015               it is not a problem */
1016            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1017            tb_end = tb_start + tb->size;
1018            if (tb_end > TARGET_PAGE_SIZE)
1019                tb_end = TARGET_PAGE_SIZE;
1020        } else {
1021            tb_start = 0;
1022            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1023        }
1024        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1025        tb = tb->page_next[n];
1026    }
1027}
1028
1029TranslationBlock *tb_gen_code(CPUArchState *env,
1030                              target_ulong pc, target_ulong cs_base,
1031                              int flags, int cflags)
1032{
1033    TranslationBlock *tb;
1034    uint8_t *tc_ptr;
1035    tb_page_addr_t phys_pc, phys_page2;
1036    target_ulong virt_page2;
1037    int code_gen_size;
1038
1039    phys_pc = get_page_addr_code(env, pc);
1040    tb = tb_alloc(pc);
1041    if (!tb) {
1042        /* flush must be done */
1043        tb_flush(env);
1044        /* cannot fail at this point */
1045        tb = tb_alloc(pc);
1046        /* Don't forget to invalidate previous TB info.  */
1047        tb_invalidated_flag = 1;
1048    }
1049    tc_ptr = code_gen_ptr;
1050    tb->tc_ptr = tc_ptr;
1051    tb->cs_base = cs_base;
1052    tb->flags = flags;
1053    tb->cflags = cflags;
1054    cpu_gen_code(env, tb, &code_gen_size);
1055    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1056                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1057
1058    /* check next page if needed */
1059    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1060    phys_page2 = -1;
1061    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1062        phys_page2 = get_page_addr_code(env, virt_page2);
1063    }
1064    tb_link_page(tb, phys_pc, phys_page2);
1065    return tb;
1066}
1067
1068/*
1069 * Invalidate all TBs which intersect with the target physical address range
1070 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1071 * 'is_cpu_write_access' should be true if called from a real cpu write
1072 * access: the virtual CPU will exit the current TB if code is modified inside
1073 * this TB.
1074 */
1075void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1076                              int is_cpu_write_access)
1077{
1078    while (start < end) {
1079        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1080        start &= TARGET_PAGE_MASK;
1081        start += TARGET_PAGE_SIZE;
1082    }
1083}
1084
1085/*
1086 * Invalidate all TBs which intersect with the target physical address range
1087 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1088 * 'is_cpu_write_access' should be true if called from a real cpu write
1089 * access: the virtual CPU will exit the current TB if code is modified inside
1090 * this TB.
1091 */
1092void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1093                                   int is_cpu_write_access)
1094{
1095    TranslationBlock *tb, *tb_next, *saved_tb;
1096    CPUArchState *env = cpu_single_env;
1097    tb_page_addr_t tb_start, tb_end;
1098    PageDesc *p;
1099    int n;
1100#ifdef TARGET_HAS_PRECISE_SMC
1101    int current_tb_not_found = is_cpu_write_access;
1102    TranslationBlock *current_tb = NULL;
1103    int current_tb_modified = 0;
1104    target_ulong current_pc = 0;
1105    target_ulong current_cs_base = 0;
1106    int current_flags = 0;
1107#endif /* TARGET_HAS_PRECISE_SMC */
1108
1109    p = page_find(start >> TARGET_PAGE_BITS);
1110    if (!p)
1111        return;
1112    if (!p->code_bitmap &&
1113        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1114        is_cpu_write_access) {
1115        /* build code bitmap */
1116        build_page_bitmap(p);
1117    }
1118
1119    /* we remove all the TBs in the range [start, end[ */
1120    /* XXX: see if in some cases it could be faster to invalidate all the code */
1121    tb = p->first_tb;
1122    while (tb != NULL) {
1123        n = (uintptr_t)tb & 3;
1124        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1125        tb_next = tb->page_next[n];
1126        /* NOTE: this is subtle as a TB may span two physical pages */
1127        if (n == 0) {
1128            /* NOTE: tb_end may be after the end of the page, but
1129               it is not a problem */
1130            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1131            tb_end = tb_start + tb->size;
1132        } else {
1133            tb_start = tb->page_addr[1];
1134            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1135        }
1136        if (!(tb_end <= start || tb_start >= end)) {
1137#ifdef TARGET_HAS_PRECISE_SMC
1138            if (current_tb_not_found) {
1139                current_tb_not_found = 0;
1140                current_tb = NULL;
1141                if (env->mem_io_pc) {
1142                    /* now we have a real cpu fault */
1143                    current_tb = tb_find_pc(env->mem_io_pc);
1144                }
1145            }
1146            if (current_tb == tb &&
1147                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1148                /* If we are modifying the current TB, we must stop
1149                its execution. We could be more precise by checking
1150                that the modification is after the current PC, but it
1151                would require a specialized function to partially
1152                restore the CPU state */
1153
1154                current_tb_modified = 1;
1155                cpu_restore_state(current_tb, env, env->mem_io_pc);
1156                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1157                                     &current_flags);
1158            }
1159#endif /* TARGET_HAS_PRECISE_SMC */
1160            /* we need to do that to handle the case where a signal
1161               occurs while doing tb_phys_invalidate() */
1162            saved_tb = NULL;
1163            if (env) {
1164                saved_tb = env->current_tb;
1165                env->current_tb = NULL;
1166            }
1167            tb_phys_invalidate(tb, -1);
1168            if (env) {
1169                env->current_tb = saved_tb;
1170                if (env->interrupt_request && env->current_tb)
1171                    cpu_interrupt(env, env->interrupt_request);
1172            }
1173        }
1174        tb = tb_next;
1175    }
1176#if !defined(CONFIG_USER_ONLY)
1177    /* if no code remaining, no need to continue to use slow writes */
1178    if (!p->first_tb) {
1179        invalidate_page_bitmap(p);
1180        if (is_cpu_write_access) {
1181            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182        }
1183    }
1184#endif
1185#ifdef TARGET_HAS_PRECISE_SMC
1186    if (current_tb_modified) {
1187        /* we generate a block containing just the instruction
1188           modifying the memory. It will ensure that it cannot modify
1189           itself */
1190        env->current_tb = NULL;
1191        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1192        cpu_resume_from_signal(env, NULL);
1193    }
1194#endif
1195}
1196
1197/* len must be <= 8 and start must be a multiple of len */
1198static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1199{
1200    PageDesc *p;
1201    int offset, b;
1202#if 0
1203    if (1) {
1204        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1205                  cpu_single_env->mem_io_vaddr, len,
1206                  cpu_single_env->eip,
1207                  cpu_single_env->eip +
1208                  (intptr_t)cpu_single_env->segs[R_CS].base);
1209    }
1210#endif
1211    p = page_find(start >> TARGET_PAGE_BITS);
1212    if (!p)
1213        return;
1214    if (p->code_bitmap) {
1215        offset = start & ~TARGET_PAGE_MASK;
1216        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1217        if (b & ((1 << len) - 1))
1218            goto do_invalidate;
1219    } else {
1220    do_invalidate:
1221        tb_invalidate_phys_page_range(start, start + len, 1);
1222    }
1223}
1224
1225#if !defined(CONFIG_SOFTMMU)
1226static void tb_invalidate_phys_page(tb_page_addr_t addr,
1227                                    uintptr_t pc, void *puc)
1228{
1229    TranslationBlock *tb;
1230    PageDesc *p;
1231    int n;
1232#ifdef TARGET_HAS_PRECISE_SMC
1233    TranslationBlock *current_tb = NULL;
1234    CPUArchState *env = cpu_single_env;
1235    int current_tb_modified = 0;
1236    target_ulong current_pc = 0;
1237    target_ulong current_cs_base = 0;
1238    int current_flags = 0;
1239#endif
1240
1241    addr &= TARGET_PAGE_MASK;
1242    p = page_find(addr >> TARGET_PAGE_BITS);
1243    if (!p)
1244        return;
1245    tb = p->first_tb;
1246#ifdef TARGET_HAS_PRECISE_SMC
1247    if (tb && pc != 0) {
1248        current_tb = tb_find_pc(pc);
1249    }
1250#endif
1251    while (tb != NULL) {
1252        n = (uintptr_t)tb & 3;
1253        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1254#ifdef TARGET_HAS_PRECISE_SMC
1255        if (current_tb == tb &&
1256            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1257                /* If we are modifying the current TB, we must stop
1258                   its execution. We could be more precise by checking
1259                   that the modification is after the current PC, but it
1260                   would require a specialized function to partially
1261                   restore the CPU state */
1262
1263            current_tb_modified = 1;
1264            cpu_restore_state(current_tb, env, pc);
1265            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1266                                 &current_flags);
1267        }
1268#endif /* TARGET_HAS_PRECISE_SMC */
1269        tb_phys_invalidate(tb, addr);
1270        tb = tb->page_next[n];
1271    }
1272    p->first_tb = NULL;
1273#ifdef TARGET_HAS_PRECISE_SMC
1274    if (current_tb_modified) {
1275        /* we generate a block containing just the instruction
1276           modifying the memory. It will ensure that it cannot modify
1277           itself */
1278        env->current_tb = NULL;
1279        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1280        cpu_resume_from_signal(env, puc);
1281    }
1282#endif
1283}
1284#endif
1285
1286/* add the tb in the target page and protect it if necessary */
1287static inline void tb_alloc_page(TranslationBlock *tb,
1288                                 unsigned int n, tb_page_addr_t page_addr)
1289{
1290    PageDesc *p;
1291#ifndef CONFIG_USER_ONLY
1292    bool page_already_protected;
1293#endif
1294
1295    tb->page_addr[n] = page_addr;
1296    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1297    tb->page_next[n] = p->first_tb;
1298#ifndef CONFIG_USER_ONLY
1299    page_already_protected = p->first_tb != NULL;
1300#endif
1301    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1302    invalidate_page_bitmap(p);
1303
1304#if defined(TARGET_HAS_SMC) || 1
1305
1306#if defined(CONFIG_USER_ONLY)
1307    if (p->flags & PAGE_WRITE) {
1308        target_ulong addr;
1309        PageDesc *p2;
1310        int prot;
1311
1312        /* force the host page as non writable (writes will have a
1313           page fault + mprotect overhead) */
1314        page_addr &= qemu_host_page_mask;
1315        prot = 0;
1316        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1317            addr += TARGET_PAGE_SIZE) {
1318
1319            p2 = page_find (addr >> TARGET_PAGE_BITS);
1320            if (!p2)
1321                continue;
1322            prot |= p2->flags;
1323            p2->flags &= ~PAGE_WRITE;
1324          }
1325        mprotect(g2h(page_addr), qemu_host_page_size,
1326                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1327#ifdef DEBUG_TB_INVALIDATE
1328        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1329               page_addr);
1330#endif
1331    }
1332#else
1333    /* if some code is already present, then the pages are already
1334       protected. So we handle the case where only the first TB is
1335       allocated in a physical page */
1336    if (!page_already_protected) {
1337        tlb_protect_code(page_addr);
1338    }
1339#endif
1340
1341#endif /* TARGET_HAS_SMC */
1342}
1343
1344/* add a new TB and link it to the physical page tables. phys_page2 is
1345   (-1) to indicate that only one page contains the TB. */
1346void tb_link_page(TranslationBlock *tb,
1347                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1348{
1349    unsigned int h;
1350    TranslationBlock **ptb;
1351
1352    /* Grab the mmap lock to stop another thread invalidating this TB
1353       before we are done.  */
1354    mmap_lock();
1355    /* add in the physical hash table */
1356    h = tb_phys_hash_func(phys_pc);
1357    ptb = &tb_phys_hash[h];
1358    tb->phys_hash_next = *ptb;
1359    *ptb = tb;
1360
1361    /* add in the page list */
1362    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1363    if (phys_page2 != -1)
1364        tb_alloc_page(tb, 1, phys_page2);
1365    else
1366        tb->page_addr[1] = -1;
1367
1368    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1369    tb->jmp_next[0] = NULL;
1370    tb->jmp_next[1] = NULL;
1371
1372    /* init original jump addresses */
1373    if (tb->tb_next_offset[0] != 0xffff)
1374        tb_reset_jump(tb, 0);
1375    if (tb->tb_next_offset[1] != 0xffff)
1376        tb_reset_jump(tb, 1);
1377
1378#ifdef DEBUG_TB_CHECK
1379    tb_page_check();
1380#endif
1381    mmap_unlock();
1382}
1383
1384/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1385   tb[1].tc_ptr. Return NULL if not found */
1386TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1387{
1388    int m_min, m_max, m;
1389    uintptr_t v;
1390    TranslationBlock *tb;
1391
1392    if (nb_tbs <= 0)
1393        return NULL;
1394    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1395        tc_ptr >= (uintptr_t)code_gen_ptr) {
1396        return NULL;
1397    }
1398    /* binary search (cf Knuth) */
1399    m_min = 0;
1400    m_max = nb_tbs - 1;
1401    while (m_min <= m_max) {
1402        m = (m_min + m_max) >> 1;
1403        tb = &tbs[m];
1404        v = (uintptr_t)tb->tc_ptr;
1405        if (v == tc_ptr)
1406            return tb;
1407        else if (tc_ptr < v) {
1408            m_max = m - 1;
1409        } else {
1410            m_min = m + 1;
1411        }
1412    }
1413    return &tbs[m_max];
1414}
1415
1416static void tb_reset_jump_recursive(TranslationBlock *tb);
1417
1418static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1419{
1420    TranslationBlock *tb1, *tb_next, **ptb;
1421    unsigned int n1;
1422
1423    tb1 = tb->jmp_next[n];
1424    if (tb1 != NULL) {
1425        /* find head of list */
1426        for(;;) {
1427            n1 = (uintptr_t)tb1 & 3;
1428            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1429            if (n1 == 2)
1430                break;
1431            tb1 = tb1->jmp_next[n1];
1432        }
1433        /* we are now sure now that tb jumps to tb1 */
1434        tb_next = tb1;
1435
1436        /* remove tb from the jmp_first list */
1437        ptb = &tb_next->jmp_first;
1438        for(;;) {
1439            tb1 = *ptb;
1440            n1 = (uintptr_t)tb1 & 3;
1441            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1442            if (n1 == n && tb1 == tb)
1443                break;
1444            ptb = &tb1->jmp_next[n1];
1445        }
1446        *ptb = tb->jmp_next[n];
1447        tb->jmp_next[n] = NULL;
1448
1449        /* suppress the jump to next tb in generated code */
1450        tb_reset_jump(tb, n);
1451
1452        /* suppress jumps in the tb on which we could have jumped */
1453        tb_reset_jump_recursive(tb_next);
1454    }
1455}
1456
1457static void tb_reset_jump_recursive(TranslationBlock *tb)
1458{
1459    tb_reset_jump_recursive2(tb, 0);
1460    tb_reset_jump_recursive2(tb, 1);
1461}
1462
1463#if defined(TARGET_HAS_ICE)
1464#if defined(CONFIG_USER_ONLY)
1465static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1466{
1467    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1468}
1469#else
1470void tb_invalidate_phys_addr(target_phys_addr_t addr)
1471{
1472    ram_addr_t ram_addr;
1473    MemoryRegionSection *section;
1474
1475    section = phys_page_find(addr >> TARGET_PAGE_BITS);
1476    if (!(memory_region_is_ram(section->mr)
1477          || (section->mr->rom_device && section->mr->readable))) {
1478        return;
1479    }
1480    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1481        + memory_region_section_addr(section, addr);
1482    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1483}
1484
1485static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1486{
1487    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1488            (pc & ~TARGET_PAGE_MASK));
1489}
1490#endif
1491#endif /* TARGET_HAS_ICE */
1492
1493#if defined(CONFIG_USER_ONLY)
1494void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1495
1496{
1497}
1498
1499int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1500                          int flags, CPUWatchpoint **watchpoint)
1501{
1502    return -ENOSYS;
1503}
1504#else
1505/* Add a watchpoint.  */
1506int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1507                          int flags, CPUWatchpoint **watchpoint)
1508{
1509    target_ulong len_mask = ~(len - 1);
1510    CPUWatchpoint *wp;
1511
1512    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1513    if ((len & (len - 1)) || (addr & ~len_mask) ||
1514            len == 0 || len > TARGET_PAGE_SIZE) {
1515        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1516                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1517        return -EINVAL;
1518    }
1519    wp = g_malloc(sizeof(*wp));
1520
1521    wp->vaddr = addr;
1522    wp->len_mask = len_mask;
1523    wp->flags = flags;
1524
1525    /* keep all GDB-injected watchpoints in front */
1526    if (flags & BP_GDB)
1527        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1528    else
1529        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1530
1531    tlb_flush_page(env, addr);
1532
1533    if (watchpoint)
1534        *watchpoint = wp;
1535    return 0;
1536}
1537
1538/* Remove a specific watchpoint.  */
1539int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1540                          int flags)
1541{
1542    target_ulong len_mask = ~(len - 1);
1543    CPUWatchpoint *wp;
1544
1545    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1546        if (addr == wp->vaddr && len_mask == wp->len_mask
1547                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1548            cpu_watchpoint_remove_by_ref(env, wp);
1549            return 0;
1550        }
1551    }
1552    return -ENOENT;
1553}
1554
1555/* Remove a specific watchpoint by reference.  */
1556void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1557{
1558    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1559
1560    tlb_flush_page(env, watchpoint->vaddr);
1561
1562    g_free(watchpoint);
1563}
1564
1565/* Remove all matching watchpoints.  */
1566void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1567{
1568    CPUWatchpoint *wp, *next;
1569
1570    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1571        if (wp->flags & mask)
1572            cpu_watchpoint_remove_by_ref(env, wp);
1573    }
1574}
1575#endif
1576
1577/* Add a breakpoint.  */
1578int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1579                          CPUBreakpoint **breakpoint)
1580{
1581#if defined(TARGET_HAS_ICE)
1582    CPUBreakpoint *bp;
1583
1584    bp = g_malloc(sizeof(*bp));
1585
1586    bp->pc = pc;
1587    bp->flags = flags;
1588
1589    /* keep all GDB-injected breakpoints in front */
1590    if (flags & BP_GDB)
1591        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1592    else
1593        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1594
1595    breakpoint_invalidate(env, pc);
1596
1597    if (breakpoint)
1598        *breakpoint = bp;
1599    return 0;
1600#else
1601    return -ENOSYS;
1602#endif
1603}
1604
1605/* Remove a specific breakpoint.  */
1606int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1607{
1608#if defined(TARGET_HAS_ICE)
1609    CPUBreakpoint *bp;
1610
1611    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1612        if (bp->pc == pc && bp->flags == flags) {
1613            cpu_breakpoint_remove_by_ref(env, bp);
1614            return 0;
1615        }
1616    }
1617    return -ENOENT;
1618#else
1619    return -ENOSYS;
1620#endif
1621}
1622
1623/* Remove a specific breakpoint by reference.  */
1624void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1625{
1626#if defined(TARGET_HAS_ICE)
1627    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1628
1629    breakpoint_invalidate(env, breakpoint->pc);
1630
1631    g_free(breakpoint);
1632#endif
1633}
1634
1635/* Remove all matching breakpoints. */
1636void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1637{
1638#if defined(TARGET_HAS_ICE)
1639    CPUBreakpoint *bp, *next;
1640
1641    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1642        if (bp->flags & mask)
1643            cpu_breakpoint_remove_by_ref(env, bp);
1644    }
1645#endif
1646}
1647
1648/* enable or disable single step mode. EXCP_DEBUG is returned by the
1649   CPU loop after each instruction */
1650void cpu_single_step(CPUArchState *env, int enabled)
1651{
1652#if defined(TARGET_HAS_ICE)
1653    if (env->singlestep_enabled != enabled) {
1654        env->singlestep_enabled = enabled;
1655        if (kvm_enabled())
1656            kvm_update_guest_debug(env, 0);
1657        else {
1658            /* must flush all the translated code to avoid inconsistencies */
1659            /* XXX: only flush what is necessary */
1660            tb_flush(env);
1661        }
1662    }
1663#endif
1664}
1665
1666static void cpu_unlink_tb(CPUArchState *env)
1667{
1668    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1669       problem and hope the cpu will stop of its own accord.  For userspace
1670       emulation this often isn't actually as bad as it sounds.  Often
1671       signals are used primarily to interrupt blocking syscalls.  */
1672    TranslationBlock *tb;
1673    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1674
1675    spin_lock(&interrupt_lock);
1676    tb = env->current_tb;
1677    /* if the cpu is currently executing code, we must unlink it and
1678       all the potentially executing TB */
1679    if (tb) {
1680        env->current_tb = NULL;
1681        tb_reset_jump_recursive(tb);
1682    }
1683    spin_unlock(&interrupt_lock);
1684}
1685
1686#ifndef CONFIG_USER_ONLY
1687/* mask must never be zero, except for A20 change call */
1688static void tcg_handle_interrupt(CPUArchState *env, int mask)
1689{
1690    int old_mask;
1691
1692    old_mask = env->interrupt_request;
1693    env->interrupt_request |= mask;
1694
1695    /*
1696     * If called from iothread context, wake the target cpu in
1697     * case its halted.
1698     */
1699    if (!qemu_cpu_is_self(env)) {
1700        qemu_cpu_kick(env);
1701        return;
1702    }
1703
1704    if (use_icount) {
1705        env->icount_decr.u16.high = 0xffff;
1706        if (!can_do_io(env)
1707            && (mask & ~old_mask) != 0) {
1708            cpu_abort(env, "Raised interrupt while not in I/O function");
1709        }
1710    } else {
1711        cpu_unlink_tb(env);
1712    }
1713}
1714
1715CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1716
1717#else /* CONFIG_USER_ONLY */
1718
1719void cpu_interrupt(CPUArchState *env, int mask)
1720{
1721    env->interrupt_request |= mask;
1722    cpu_unlink_tb(env);
1723}
1724#endif /* CONFIG_USER_ONLY */
1725
1726void cpu_reset_interrupt(CPUArchState *env, int mask)
1727{
1728    env->interrupt_request &= ~mask;
1729}
1730
1731void cpu_exit(CPUArchState *env)
1732{
1733    env->exit_request = 1;
1734    cpu_unlink_tb(env);
1735}
1736
1737void cpu_abort(CPUArchState *env, const char *fmt, ...)
1738{
1739    va_list ap;
1740    va_list ap2;
1741
1742    va_start(ap, fmt);
1743    va_copy(ap2, ap);
1744    fprintf(stderr, "qemu: fatal: ");
1745    vfprintf(stderr, fmt, ap);
1746    fprintf(stderr, "\n");
1747#ifdef TARGET_I386
1748    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1749#else
1750    cpu_dump_state(env, stderr, fprintf, 0);
1751#endif
1752    if (qemu_log_enabled()) {
1753        qemu_log("qemu: fatal: ");
1754        qemu_log_vprintf(fmt, ap2);
1755        qemu_log("\n");
1756#ifdef TARGET_I386
1757        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1758#else
1759        log_cpu_state(env, 0);
1760#endif
1761        qemu_log_flush();
1762        qemu_log_close();
1763    }
1764    va_end(ap2);
1765    va_end(ap);
1766#if defined(CONFIG_USER_ONLY)
1767    {
1768        struct sigaction act;
1769        sigfillset(&act.sa_mask);
1770        act.sa_handler = SIG_DFL;
1771        sigaction(SIGABRT, &act, NULL);
1772    }
1773#endif
1774    abort();
1775}
1776
1777CPUArchState *cpu_copy(CPUArchState *env)
1778{
1779    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1780    CPUArchState *next_cpu = new_env->next_cpu;
1781    int cpu_index = new_env->cpu_index;
1782#if defined(TARGET_HAS_ICE)
1783    CPUBreakpoint *bp;
1784    CPUWatchpoint *wp;
1785#endif
1786
1787    memcpy(new_env, env, sizeof(CPUArchState));
1788
1789    /* Preserve chaining and index. */
1790    new_env->next_cpu = next_cpu;
1791    new_env->cpu_index = cpu_index;
1792
1793    /* Clone all break/watchpoints.
1794       Note: Once we support ptrace with hw-debug register access, make sure
1795       BP_CPU break/watchpoints are handled correctly on clone. */
1796    QTAILQ_INIT(&env->breakpoints);
1797    QTAILQ_INIT(&env->watchpoints);
1798#if defined(TARGET_HAS_ICE)
1799    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1800        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1801    }
1802    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1803        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1804                              wp->flags, NULL);
1805    }
1806#endif
1807
1808    return new_env;
1809}
1810
1811#if !defined(CONFIG_USER_ONLY)
1812void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1813{
1814    unsigned int i;
1815
1816    /* Discard jump cache entries for any tb which might potentially
1817       overlap the flushed page.  */
1818    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1819    memset (&env->tb_jmp_cache[i], 0, 
1820            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1821
1822    i = tb_jmp_cache_hash_page(addr);
1823    memset (&env->tb_jmp_cache[i], 0, 
1824            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1825}
1826
1827static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1828                                      uintptr_t length)
1829{
1830    uintptr_t start1;
1831
1832    /* we modify the TLB cache so that the dirty bit will be set again
1833       when accessing the range */
1834    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1835    /* Check that we don't span multiple blocks - this breaks the
1836       address comparisons below.  */
1837    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1838            != (end - 1) - start) {
1839        abort();
1840    }
1841    cpu_tlb_reset_dirty_all(start1, length);
1842
1843}
1844
1845/* Note: start and end must be within the same ram block.  */
1846void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1847                                     int dirty_flags)
1848{
1849    uintptr_t length;
1850
1851    start &= TARGET_PAGE_MASK;
1852    end = TARGET_PAGE_ALIGN(end);
1853
1854    length = end - start;
1855    if (length == 0)
1856        return;
1857    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1858
1859    if (tcg_enabled()) {
1860        tlb_reset_dirty_range_all(start, end, length);
1861    }
1862}
1863
1864int cpu_physical_memory_set_dirty_tracking(int enable)
1865{
1866    int ret = 0;
1867    in_migration = enable;
1868    return ret;
1869}
1870
1871target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1872                                                   MemoryRegionSection *section,
1873                                                   target_ulong vaddr,
1874                                                   target_phys_addr_t paddr,
1875                                                   int prot,
1876                                                   target_ulong *address)
1877{
1878    target_phys_addr_t iotlb;
1879    CPUWatchpoint *wp;
1880
1881    if (memory_region_is_ram(section->mr)) {
1882        /* Normal RAM.  */
1883        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1884            + memory_region_section_addr(section, paddr);
1885        if (!section->readonly) {
1886            iotlb |= phys_section_notdirty;
1887        } else {
1888            iotlb |= phys_section_rom;
1889        }
1890    } else {
1891        /* IO handlers are currently passed a physical address.
1892           It would be nice to pass an offset from the base address
1893           of that region.  This would avoid having to special case RAM,
1894           and avoid full address decoding in every device.
1895           We can't use the high bits of pd for this because
1896           IO_MEM_ROMD uses these as a ram address.  */
1897        iotlb = section - phys_sections;
1898        iotlb += memory_region_section_addr(section, paddr);
1899    }
1900
1901    /* Make accesses to pages with watchpoints go via the
1902       watchpoint trap routines.  */
1903    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1904        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1905            /* Avoid trapping reads of pages with a write breakpoint. */
1906            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1907                iotlb = phys_section_watch + paddr;
1908                *address |= TLB_MMIO;
1909                break;
1910            }
1911        }
1912    }
1913
1914    return iotlb;
1915}
1916
1917#else
1918/*
1919 * Walks guest process memory "regions" one by one
1920 * and calls callback function 'fn' for each region.
1921 */
1922
1923struct walk_memory_regions_data
1924{
1925    walk_memory_regions_fn fn;
1926    void *priv;
1927    uintptr_t start;
1928    int prot;
1929};
1930
1931static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1932                                   abi_ulong end, int new_prot)
1933{
1934    if (data->start != -1ul) {
1935        int rc = data->fn(data->priv, data->start, end, data->prot);
1936        if (rc != 0) {
1937            return rc;
1938        }
1939    }
1940
1941    data->start = (new_prot ? end : -1ul);
1942    data->prot = new_prot;
1943
1944    return 0;
1945}
1946
1947static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1948                                 abi_ulong base, int level, void **lp)
1949{
1950    abi_ulong pa;
1951    int i, rc;
1952
1953    if (*lp == NULL) {
1954        return walk_memory_regions_end(data, base, 0);
1955    }
1956
1957    if (level == 0) {
1958        PageDesc *pd = *lp;
1959        for (i = 0; i < L2_SIZE; ++i) {
1960            int prot = pd[i].flags;
1961
1962            pa = base | (i << TARGET_PAGE_BITS);
1963            if (prot != data->prot) {
1964                rc = walk_memory_regions_end(data, pa, prot);
1965                if (rc != 0) {
1966                    return rc;
1967                }
1968            }
1969        }
1970    } else {
1971        void **pp = *lp;
1972        for (i = 0; i < L2_SIZE; ++i) {
1973            pa = base | ((abi_ulong)i <<
1974                (TARGET_PAGE_BITS + L2_BITS * level));
1975            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1976            if (rc != 0) {
1977                return rc;
1978            }
1979        }
1980    }
1981
1982    return 0;
1983}
1984
1985int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1986{
1987    struct walk_memory_regions_data data;
1988    uintptr_t i;
1989
1990    data.fn = fn;
1991    data.priv = priv;
1992    data.start = -1ul;
1993    data.prot = 0;
1994
1995    for (i = 0; i < V_L1_SIZE; i++) {
1996        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1997                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1998        if (rc != 0) {
1999            return rc;
2000        }
2001    }
2002
2003    return walk_memory_regions_end(&data, 0, 0);
2004}
2005
2006static int dump_region(void *priv, abi_ulong start,
2007    abi_ulong end, unsigned long prot)
2008{
2009    FILE *f = (FILE *)priv;
2010
2011    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2012        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2013        start, end, end - start,
2014        ((prot & PAGE_READ) ? 'r' : '-'),
2015        ((prot & PAGE_WRITE) ? 'w' : '-'),
2016        ((prot & PAGE_EXEC) ? 'x' : '-'));
2017
2018    return (0);
2019}
2020
2021/* dump memory mappings */
2022void page_dump(FILE *f)
2023{
2024    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2025            "start", "end", "size", "prot");
2026    walk_memory_regions(f, dump_region);
2027}
2028
2029int page_get_flags(target_ulong address)
2030{
2031    PageDesc *p;
2032
2033    p = page_find(address >> TARGET_PAGE_BITS);
2034    if (!p)
2035        return 0;
2036    return p->flags;
2037}
2038
2039/* Modify the flags of a page and invalidate the code if necessary.
2040   The flag PAGE_WRITE_ORG is positioned automatically depending
2041   on PAGE_WRITE.  The mmap_lock should already be held.  */
2042void page_set_flags(target_ulong start, target_ulong end, int flags)
2043{
2044    target_ulong addr, len;
2045
2046    /* This function should never be called with addresses outside the
2047       guest address space.  If this assert fires, it probably indicates
2048       a missing call to h2g_valid.  */
2049#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2050    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2051#endif
2052    assert(start < end);
2053
2054    start = start & TARGET_PAGE_MASK;
2055    end = TARGET_PAGE_ALIGN(end);
2056
2057    if (flags & PAGE_WRITE) {
2058        flags |= PAGE_WRITE_ORG;
2059    }
2060
2061    for (addr = start, len = end - start;
2062         len != 0;
2063         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2064        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2065
2066        /* If the write protection bit is set, then we invalidate
2067           the code inside.  */
2068        if (!(p->flags & PAGE_WRITE) &&
2069            (flags & PAGE_WRITE) &&
2070            p->first_tb) {
2071            tb_invalidate_phys_page(addr, 0, NULL);
2072        }
2073        p->flags = flags;
2074    }
2075}
2076
2077int page_check_range(target_ulong start, target_ulong len, int flags)
2078{
2079    PageDesc *p;
2080    target_ulong end;
2081    target_ulong addr;
2082
2083    /* This function should never be called with addresses outside the
2084       guest address space.  If this assert fires, it probably indicates
2085       a missing call to h2g_valid.  */
2086#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2087    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2088#endif
2089
2090    if (len == 0) {
2091        return 0;
2092    }
2093    if (start + len - 1 < start) {
2094        /* We've wrapped around.  */
2095        return -1;
2096    }
2097
2098    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2099    start = start & TARGET_PAGE_MASK;
2100
2101    for (addr = start, len = end - start;
2102         len != 0;
2103         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2104        p = page_find(addr >> TARGET_PAGE_BITS);
2105        if( !p )
2106            return -1;
2107        if( !(p->flags & PAGE_VALID) )
2108            return -1;
2109
2110        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2111            return -1;
2112        if (flags & PAGE_WRITE) {
2113            if (!(p->flags & PAGE_WRITE_ORG))
2114                return -1;
2115            /* unprotect the page if it was put read-only because it
2116               contains translated code */
2117            if (!(p->flags & PAGE_WRITE)) {
2118                if (!page_unprotect(addr, 0, NULL))
2119                    return -1;
2120            }
2121            return 0;
2122        }
2123    }
2124    return 0;
2125}
2126
2127/* called from signal handler: invalidate the code and unprotect the
2128   page. Return TRUE if the fault was successfully handled. */
2129int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2130{
2131    unsigned int prot;
2132    PageDesc *p;
2133    target_ulong host_start, host_end, addr;
2134
2135    /* Technically this isn't safe inside a signal handler.  However we
2136       know this only ever happens in a synchronous SEGV handler, so in
2137       practice it seems to be ok.  */
2138    mmap_lock();
2139
2140    p = page_find(address >> TARGET_PAGE_BITS);
2141    if (!p) {
2142        mmap_unlock();
2143        return 0;
2144    }
2145
2146    /* if the page was really writable, then we change its
2147       protection back to writable */
2148    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2149        host_start = address & qemu_host_page_mask;
2150        host_end = host_start + qemu_host_page_size;
2151
2152        prot = 0;
2153        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2154            p = page_find(addr >> TARGET_PAGE_BITS);
2155            p->flags |= PAGE_WRITE;
2156            prot |= p->flags;
2157
2158            /* and since the content will be modified, we must invalidate
2159               the corresponding translated code. */
2160            tb_invalidate_phys_page(addr, pc, puc);
2161#ifdef DEBUG_TB_CHECK
2162            tb_invalidate_check(addr);
2163#endif
2164        }
2165        mprotect((void *)g2h(host_start), qemu_host_page_size,
2166                 prot & PAGE_BITS);
2167
2168        mmap_unlock();
2169        return 1;
2170    }
2171    mmap_unlock();
2172    return 0;
2173}
2174#endif /* defined(CONFIG_USER_ONLY) */
2175
2176#if !defined(CONFIG_USER_ONLY)
2177
2178#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2179typedef struct subpage_t {
2180    MemoryRegion iomem;
2181    target_phys_addr_t base;
2182    uint16_t sub_section[TARGET_PAGE_SIZE];
2183} subpage_t;
2184
2185static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2186                             uint16_t section);
2187static subpage_t *subpage_init(target_phys_addr_t base);
2188static void destroy_page_desc(uint16_t section_index)
2189{
2190    MemoryRegionSection *section = &phys_sections[section_index];
2191    MemoryRegion *mr = section->mr;
2192
2193    if (mr->subpage) {
2194        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2195        memory_region_destroy(&subpage->iomem);
2196        g_free(subpage);
2197    }
2198}
2199
2200static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2201{
2202    unsigned i;
2203    PhysPageEntry *p;
2204
2205    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2206        return;
2207    }
2208
2209    p = phys_map_nodes[lp->ptr];
2210    for (i = 0; i < L2_SIZE; ++i) {
2211        if (!p[i].is_leaf) {
2212            destroy_l2_mapping(&p[i], level - 1);
2213        } else {
2214            destroy_page_desc(p[i].ptr);
2215        }
2216    }
2217    lp->is_leaf = 0;
2218    lp->ptr = PHYS_MAP_NODE_NIL;
2219}
2220
2221static void destroy_all_mappings(void)
2222{
2223    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2224    phys_map_nodes_reset();
2225}
2226
2227static uint16_t phys_section_add(MemoryRegionSection *section)
2228{
2229    if (phys_sections_nb == phys_sections_nb_alloc) {
2230        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2231        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2232                                phys_sections_nb_alloc);
2233    }
2234    phys_sections[phys_sections_nb] = *section;
2235    return phys_sections_nb++;
2236}
2237
2238static void phys_sections_clear(void)
2239{
2240    phys_sections_nb = 0;
2241}
2242
2243static void register_subpage(MemoryRegionSection *section)
2244{
2245    subpage_t *subpage;
2246    target_phys_addr_t base = section->offset_within_address_space
2247        & TARGET_PAGE_MASK;
2248    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2249    MemoryRegionSection subsection = {
2250        .offset_within_address_space = base,
2251        .size = TARGET_PAGE_SIZE,
2252    };
2253    target_phys_addr_t start, end;
2254
2255    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2256
2257    if (!(existing->mr->subpage)) {
2258        subpage = subpage_init(base);
2259        subsection.mr = &subpage->iomem;
2260        phys_page_set(base >> TARGET_PAGE_BITS, 1,
2261                      phys_section_add(&subsection));
2262    } else {
2263        subpage = container_of(existing->mr, subpage_t, iomem);
2264    }
2265    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2266    end = start + section->size - 1;
2267    subpage_register(subpage, start, end, phys_section_add(section));
2268}
2269
2270
2271static void register_multipage(MemoryRegionSection *section)
2272{
2273    target_phys_addr_t start_addr = section->offset_within_address_space;
2274    ram_addr_t size = section->size;
2275    target_phys_addr_t addr;
2276    uint16_t section_index = phys_section_add(section);
2277
2278    assert(size);
2279
2280    addr = start_addr;
2281    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2282                  section_index);
2283}
2284
2285void cpu_register_physical_memory_log(MemoryRegionSection *section,
2286                                      bool readonly)
2287{
2288    MemoryRegionSection now = *section, remain = *section;
2289
2290    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2291        || (now.size < TARGET_PAGE_SIZE)) {
2292        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2293                       - now.offset_within_address_space,
2294                       now.size);
2295        register_subpage(&now);
2296        remain.size -= now.size;
2297        remain.offset_within_address_space += now.size;
2298        remain.offset_within_region += now.size;
2299    }
2300    while (remain.size >= TARGET_PAGE_SIZE) {
2301        now = remain;
2302        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2303            now.size = TARGET_PAGE_SIZE;
2304            register_subpage(&now);
2305        } else {
2306            now.size &= TARGET_PAGE_MASK;
2307            register_multipage(&now);
2308        }
2309        remain.size -= now.size;
2310        remain.offset_within_address_space += now.size;
2311        remain.offset_within_region += now.size;
2312    }
2313    now = remain;
2314    if (now.size) {
2315        register_subpage(&now);
2316    }
2317}
2318
2319
2320void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2321{
2322    if (kvm_enabled())
2323        kvm_coalesce_mmio_region(addr, size);
2324}
2325
2326void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2327{
2328    if (kvm_enabled())
2329        kvm_uncoalesce_mmio_region(addr, size);
2330}
2331
2332void qemu_flush_coalesced_mmio_buffer(void)
2333{
2334    if (kvm_enabled())
2335        kvm_flush_coalesced_mmio_buffer();
2336}
2337
2338#if defined(__linux__) && !defined(TARGET_S390X)
2339
2340#include <sys/vfs.h>
2341
2342#define HUGETLBFS_MAGIC       0x958458f6
2343
2344static long gethugepagesize(const char *path)
2345{
2346    struct statfs fs;
2347    int ret;
2348
2349    do {
2350        ret = statfs(path, &fs);
2351    } while (ret != 0 && errno == EINTR);
2352
2353    if (ret != 0) {
2354        perror(path);
2355        return 0;
2356    }
2357
2358    if (fs.f_type != HUGETLBFS_MAGIC)
2359        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2360
2361    return fs.f_bsize;
2362}
2363
2364static void *file_ram_alloc(RAMBlock *block,
2365                            ram_addr_t memory,
2366                            const char *path)
2367{
2368    char *filename;
2369    void *area;
2370    int fd;
2371#ifdef MAP_POPULATE
2372    int flags;
2373#endif
2374    unsigned long hpagesize;
2375
2376    hpagesize = gethugepagesize(path);
2377    if (!hpagesize) {
2378        return NULL;
2379    }
2380
2381    if (memory < hpagesize) {
2382        return NULL;
2383    }
2384
2385    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2386        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2387        return NULL;
2388    }
2389
2390    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2391        return NULL;
2392    }
2393
2394    fd = mkstemp(filename);
2395    if (fd < 0) {
2396        perror("unable to create backing store for hugepages");
2397        free(filename);
2398        return NULL;
2399    }
2400    unlink(filename);
2401    free(filename);
2402
2403    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2404
2405    /*
2406     * ftruncate is not supported by hugetlbfs in older
2407     * hosts, so don't bother bailing out on errors.
2408     * If anything goes wrong with it under other filesystems,
2409     * mmap will fail.
2410     */
2411    if (ftruncate(fd, memory))
2412        perror("ftruncate");
2413
2414#ifdef MAP_POPULATE
2415    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2416     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2417     * to sidestep this quirk.
2418     */
2419    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2420    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2421#else
2422    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2423#endif
2424    if (area == MAP_FAILED) {
2425        perror("file_ram_alloc: can't mmap RAM pages");
2426        close(fd);
2427        return (NULL);
2428    }
2429    block->fd = fd;
2430    return area;
2431}
2432#endif
2433
2434static ram_addr_t find_ram_offset(ram_addr_t size)
2435{
2436    RAMBlock *block, *next_block;
2437    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2438
2439    if (QLIST_EMPTY(&ram_list.blocks))
2440        return 0;
2441
2442    QLIST_FOREACH(block, &ram_list.blocks, next) {
2443        ram_addr_t end, next = RAM_ADDR_MAX;
2444
2445        end = block->offset + block->length;
2446
2447        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2448            if (next_block->offset >= end) {
2449                next = MIN(next, next_block->offset);
2450            }
2451        }
2452        if (next - end >= size && next - end < mingap) {
2453            offset = end;
2454            mingap = next - end;
2455        }
2456    }
2457
2458    if (offset == RAM_ADDR_MAX) {
2459        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2460                (uint64_t)size);
2461        abort();
2462    }
2463
2464    return offset;
2465}
2466
2467static ram_addr_t last_ram_offset(void)
2468{
2469    RAMBlock *block;
2470    ram_addr_t last = 0;
2471
2472    QLIST_FOREACH(block, &ram_list.blocks, next)
2473        last = MAX(last, block->offset + block->length);
2474
2475    return last;
2476}
2477
2478static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2479{
2480    int ret;
2481    QemuOpts *machine_opts;
2482
2483    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2484    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2485    if (machine_opts &&
2486        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2487        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2488        if (ret) {
2489            perror("qemu_madvise");
2490            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2491                            "but dump_guest_core=off specified\n");
2492        }
2493    }
2494}
2495
2496void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2497{
2498    RAMBlock *new_block, *block;
2499
2500    new_block = NULL;
2501    QLIST_FOREACH(block, &ram_list.blocks, next) {
2502        if (block->offset == addr) {
2503            new_block = block;
2504            break;
2505        }
2506    }
2507    assert(new_block);
2508    assert(!new_block->idstr[0]);
2509
2510    if (dev) {
2511        char *id = qdev_get_dev_path(dev);
2512        if (id) {
2513            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2514            g_free(id);
2515        }
2516    }
2517    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2518
2519    QLIST_FOREACH(block, &ram_list.blocks, next) {
2520        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2521            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2522                    new_block->idstr);
2523            abort();
2524        }
2525    }
2526}
2527
2528ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2529                                   MemoryRegion *mr)
2530{
2531    RAMBlock *new_block;
2532
2533    size = TARGET_PAGE_ALIGN(size);
2534    new_block = g_malloc0(sizeof(*new_block));
2535
2536    new_block->mr = mr;
2537    new_block->offset = find_ram_offset(size);
2538    if (host) {
2539        new_block->host = host;
2540        new_block->flags |= RAM_PREALLOC_MASK;
2541    } else {
2542        if (mem_path) {
2543#if defined (__linux__) && !defined(TARGET_S390X)
2544            new_block->host = file_ram_alloc(new_block, size, mem_path);
2545            if (!new_block->host) {
2546                new_block->host = qemu_vmalloc(size);
2547                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2548            }
2549#else
2550            fprintf(stderr, "-mem-path option unsupported\n");
2551            exit(1);
2552#endif
2553        } else {
2554            if (xen_enabled()) {
2555                xen_ram_alloc(new_block->offset, size, mr);
2556            } else if (kvm_enabled()) {
2557                /* some s390/kvm configurations have special constraints */
2558                new_block->host = kvm_vmalloc(size);
2559            } else {
2560                new_block->host = qemu_vmalloc(size);
2561            }
2562            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2563        }
2564    }
2565    new_block->length = size;
2566
2567    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2568
2569    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2570                                       last_ram_offset() >> TARGET_PAGE_BITS);
2571    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2572           0, size >> TARGET_PAGE_BITS);
2573    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2574
2575    qemu_ram_setup_dump(new_block->host, size);
2576
2577    if (kvm_enabled())
2578        kvm_setup_guest_memory(new_block->host, size);
2579
2580    return new_block->offset;
2581}
2582
2583ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2584{
2585    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2586}
2587
2588void qemu_ram_free_from_ptr(ram_addr_t addr)
2589{
2590    RAMBlock *block;
2591
2592    QLIST_FOREACH(block, &ram_list.blocks, next) {
2593        if (addr == block->offset) {
2594            QLIST_REMOVE(block, next);
2595            g_free(block);
2596            return;
2597        }
2598    }
2599}
2600
2601void qemu_ram_free(ram_addr_t addr)
2602{
2603    RAMBlock *block;
2604
2605    QLIST_FOREACH(block, &ram_list.blocks, next) {
2606        if (addr == block->offset) {
2607            QLIST_REMOVE(block, next);
2608            if (block->flags & RAM_PREALLOC_MASK) {
2609                ;
2610            } else if (mem_path) {
2611#if defined (__linux__) && !defined(TARGET_S390X)
2612                if (block->fd) {
2613                    munmap(block->host, block->length);
2614                    close(block->fd);
2615                } else {
2616                    qemu_vfree(block->host);
2617                }
2618#else
2619                abort();
2620#endif
2621            } else {
2622#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2623                munmap(block->host, block->length);
2624#else
2625                if (xen_enabled()) {
2626                    xen_invalidate_map_cache_entry(block->host);
2627                } else {
2628                    qemu_vfree(block->host);
2629                }
2630#endif
2631            }
2632            g_free(block);
2633            return;
2634        }
2635    }
2636
2637}
2638
2639#ifndef _WIN32
2640void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2641{
2642    RAMBlock *block;
2643    ram_addr_t offset;
2644    int flags;
2645    void *area, *vaddr;
2646
2647    QLIST_FOREACH(block, &ram_list.blocks, next) {
2648        offset = addr - block->offset;
2649        if (offset < block->length) {
2650            vaddr = block->host + offset;
2651            if (block->flags & RAM_PREALLOC_MASK) {
2652                ;
2653            } else {
2654                flags = MAP_FIXED;
2655                munmap(vaddr, length);
2656                if (mem_path) {
2657#if defined(__linux__) && !defined(TARGET_S390X)
2658                    if (block->fd) {
2659#ifdef MAP_POPULATE
2660                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2661                            MAP_PRIVATE;
2662#else
2663                        flags |= MAP_PRIVATE;
2664#endif
2665                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2666                                    flags, block->fd, offset);
2667                    } else {
2668                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2669                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2670                                    flags, -1, 0);
2671                    }
2672#else
2673                    abort();
2674#endif
2675                } else {
2676#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2677                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2678                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2679                                flags, -1, 0);
2680#else
2681                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2682                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2683                                flags, -1, 0);
2684#endif
2685                }
2686                if (area != vaddr) {
2687                    fprintf(stderr, "Could not remap addr: "
2688                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2689                            length, addr);
2690                    exit(1);
2691                }
2692                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2693                qemu_ram_setup_dump(vaddr, length);
2694            }
2695            return;
2696        }
2697    }
2698}
2699#endif /* !_WIN32 */
2700
2701/* Return a host pointer to ram allocated with qemu_ram_alloc.
2702   With the exception of the softmmu code in this file, this should
2703   only be used for local memory (e.g. video ram) that the device owns,
2704   and knows it isn't going to access beyond the end of the block.
2705
2706   It should not be used for general purpose DMA.
2707   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2708 */
2709void *qemu_get_ram_ptr(ram_addr_t addr)
2710{
2711    RAMBlock *block;
2712
2713    QLIST_FOREACH(block, &ram_list.blocks, next) {
2714        if (addr - block->offset < block->length) {
2715            /* Move this entry to to start of the list.  */
2716            if (block != QLIST_FIRST(&ram_list.blocks)) {
2717                QLIST_REMOVE(block, next);
2718                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2719            }
2720            if (xen_enabled()) {
2721                /* We need to check if the requested address is in the RAM
2722                 * because we don't want to map the entire memory in QEMU.
2723                 * In that case just map until the end of the page.
2724                 */
2725                if (block->offset == 0) {
2726                    return xen_map_cache(addr, 0, 0);
2727                } else if (block->host == NULL) {
2728                    block->host =
2729                        xen_map_cache(block->offset, block->length, 1);
2730                }
2731            }
2732            return block->host + (addr - block->offset);
2733        }
2734    }
2735
2736    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2737    abort();
2738
2739    return NULL;
2740}
2741
2742/* Return a host pointer to ram allocated with qemu_ram_alloc.
2743 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2744 */
2745void *qemu_safe_ram_ptr(ram_addr_t addr)
2746{
2747    RAMBlock *block;
2748
2749    QLIST_FOREACH(block, &ram_list.blocks, next) {
2750        if (addr - block->offset < block->length) {
2751            if (xen_enabled()) {
2752                /* We need to check if the requested address is in the RAM
2753                 * because we don't want to map the entire memory in QEMU.
2754                 * In that case just map until the end of the page.
2755                 */
2756                if (block->offset == 0) {
2757                    return xen_map_cache(addr, 0, 0);
2758                } else if (block->host == NULL) {
2759                    block->host =
2760                        xen_map_cache(block->offset, block->length, 1);
2761                }
2762            }
2763            return block->host + (addr - block->offset);
2764        }
2765    }
2766
2767    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2768    abort();
2769
2770    return NULL;
2771}
2772
2773/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2774 * but takes a size argument */
2775void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2776{
2777    if (*size == 0) {
2778        return NULL;
2779    }
2780    if (xen_enabled()) {
2781        return xen_map_cache(addr, *size, 1);
2782    } else {
2783        RAMBlock *block;
2784
2785        QLIST_FOREACH(block, &ram_list.blocks, next) {
2786            if (addr - block->offset < block->length) {
2787                if (addr - block->offset + *size > block->length)
2788                    *size = block->length - addr + block->offset;
2789                return block->host + (addr - block->offset);
2790            }
2791        }
2792
2793        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2794        abort();
2795    }
2796}
2797
2798void qemu_put_ram_ptr(void *addr)
2799{
2800    trace_qemu_put_ram_ptr(addr);
2801}
2802
2803int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2804{
2805    RAMBlock *block;
2806    uint8_t *host = ptr;
2807
2808    if (xen_enabled()) {
2809        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2810        return 0;
2811    }
2812
2813    QLIST_FOREACH(block, &ram_list.blocks, next) {
2814        /* This case append when the block is not mapped. */
2815        if (block->host == NULL) {
2816            continue;
2817        }
2818        if (host - block->host < block->length) {
2819            *ram_addr = block->offset + (host - block->host);
2820            return 0;
2821        }
2822    }
2823
2824    return -1;
2825}
2826
2827/* Some of the softmmu routines need to translate from a host pointer
2828   (typically a TLB entry) back to a ram offset.  */
2829ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2830{
2831    ram_addr_t ram_addr;
2832
2833    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2834        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2835        abort();
2836    }
2837    return ram_addr;
2838}
2839
2840static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2841                                    unsigned size)
2842{
2843#ifdef DEBUG_UNASSIGNED
2844    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2845#endif
2846#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2847    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2848#endif
2849    return 0;
2850}
2851
2852static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2853                                 uint64_t val, unsigned size)
2854{
2855#ifdef DEBUG_UNASSIGNED
2856    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2857#endif
2858#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2859    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2860#endif
2861}
2862
2863static const MemoryRegionOps unassigned_mem_ops = {
2864    .read = unassigned_mem_read,
2865    .write = unassigned_mem_write,
2866    .endianness = DEVICE_NATIVE_ENDIAN,
2867};
2868
2869static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2870                               unsigned size)
2871{
2872    abort();
2873}
2874
2875static void error_mem_write(void *opaque, target_phys_addr_t addr,
2876                            uint64_t value, unsigned size)
2877{
2878    abort();
2879}
2880
2881static const MemoryRegionOps error_mem_ops = {
2882    .read = error_mem_read,
2883    .write = error_mem_write,
2884    .endianness = DEVICE_NATIVE_ENDIAN,
2885};
2886
2887static const MemoryRegionOps rom_mem_ops = {
2888    .read = error_mem_read,
2889    .write = unassigned_mem_write,
2890    .endianness = DEVICE_NATIVE_ENDIAN,
2891};
2892
2893static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2894                               uint64_t val, unsigned size)
2895{
2896    int dirty_flags;
2897    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2898    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2899#if !defined(CONFIG_USER_ONLY)
2900        tb_invalidate_phys_page_fast(ram_addr, size);
2901        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2902#endif
2903    }
2904    switch (size) {
2905    case 1:
2906        stb_p(qemu_get_ram_ptr(ram_addr), val);
2907        break;
2908    case 2:
2909        stw_p(qemu_get_ram_ptr(ram_addr), val);
2910        break;
2911    case 4:
2912        stl_p(qemu_get_ram_ptr(ram_addr), val);
2913        break;
2914    default:
2915        abort();
2916    }
2917    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2918    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2919    /* we remove the notdirty callback only if the code has been
2920       flushed */
2921    if (dirty_flags == 0xff)
2922        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2923}
2924
2925static const MemoryRegionOps notdirty_mem_ops = {
2926    .read = error_mem_read,
2927    .write = notdirty_mem_write,
2928    .endianness = DEVICE_NATIVE_ENDIAN,
2929};
2930
2931/* Generate a debug exception if a watchpoint has been hit.  */
2932static void check_watchpoint(int offset, int len_mask, int flags)
2933{
2934    CPUArchState *env = cpu_single_env;
2935    target_ulong pc, cs_base;
2936    TranslationBlock *tb;
2937    target_ulong vaddr;
2938    CPUWatchpoint *wp;
2939    int cpu_flags;
2940
2941    if (env->watchpoint_hit) {
2942        /* We re-entered the check after replacing the TB. Now raise
2943         * the debug interrupt so that is will trigger after the
2944         * current instruction. */
2945        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2946        return;
2947    }
2948    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2949    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2950        if ((vaddr == (wp->vaddr & len_mask) ||
2951             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2952            wp->flags |= BP_WATCHPOINT_HIT;
2953            if (!env->watchpoint_hit) {
2954                env->watchpoint_hit = wp;
2955                tb = tb_find_pc(env->mem_io_pc);
2956                if (!tb) {
2957                    cpu_abort(env, "check_watchpoint: could not find TB for "
2958                              "pc=%p", (void *)env->mem_io_pc);
2959                }
2960                cpu_restore_state(tb, env, env->mem_io_pc);
2961                tb_phys_invalidate(tb, -1);
2962                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2963                    env->exception_index = EXCP_DEBUG;
2964                    cpu_loop_exit(env);
2965                } else {
2966                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2967                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2968                    cpu_resume_from_signal(env, NULL);
2969                }
2970            }
2971        } else {
2972            wp->flags &= ~BP_WATCHPOINT_HIT;
2973        }
2974    }
2975}
2976
2977/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2978   so these check for a hit then pass through to the normal out-of-line
2979   phys routines.  */
2980static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2981                               unsigned size)
2982{
2983    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2984    switch (size) {
2985    case 1: return ldub_phys(addr);
2986    case 2: return lduw_phys(addr);
2987    case 4: return ldl_phys(addr);
2988    default: abort();
2989    }
2990}
2991
2992static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2993                            uint64_t val, unsigned size)
2994{
2995    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2996    switch (size) {
2997    case 1:
2998        stb_phys(addr, val);
2999        break;
3000    case 2:
3001        stw_phys(addr, val);
3002        break;
3003    case 4:
3004        stl_phys(addr, val);
3005        break;
3006    default: abort();
3007    }
3008}
3009
3010static const MemoryRegionOps watch_mem_ops = {
3011    .read = watch_mem_read,
3012    .write = watch_mem_write,
3013    .endianness = DEVICE_NATIVE_ENDIAN,
3014};
3015
3016static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3017                             unsigned len)
3018{
3019    subpage_t *mmio = opaque;
3020    unsigned int idx = SUBPAGE_IDX(addr);
3021    MemoryRegionSection *section;
3022#if defined(DEBUG_SUBPAGE)
3023    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3024           mmio, len, addr, idx);
3025#endif
3026
3027    section = &phys_sections[mmio->sub_section[idx]];
3028    addr += mmio->base;
3029    addr -= section->offset_within_address_space;
3030    addr += section->offset_within_region;
3031    return io_mem_read(section->mr, addr, len);
3032}
3033
3034static void subpage_write(void *opaque, target_phys_addr_t addr,
3035                          uint64_t value, unsigned len)
3036{
3037    subpage_t *mmio = opaque;
3038    unsigned int idx = SUBPAGE_IDX(addr);
3039    MemoryRegionSection *section;
3040#if defined(DEBUG_SUBPAGE)
3041    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3042           " idx %d value %"PRIx64"\n",
3043           __func__, mmio, len, addr, idx, value);
3044#endif
3045
3046    section = &phys_sections[mmio->sub_section[idx]];
3047    addr += mmio->base;
3048    addr -= section->offset_within_address_space;
3049    addr += section->offset_within_region;
3050    io_mem_write(section->mr, addr, value, len);
3051}
3052
3053static const MemoryRegionOps subpage_ops = {
3054    .read = subpage_read,
3055    .write = subpage_write,
3056    .endianness = DEVICE_NATIVE_ENDIAN,
3057};
3058
3059static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3060                                 unsigned size)
3061{
3062    ram_addr_t raddr = addr;
3063    void *ptr = qemu_get_ram_ptr(raddr);
3064    switch (size) {
3065    case 1: return ldub_p(ptr);
3066    case 2: return lduw_p(ptr);
3067    case 4: return ldl_p(ptr);
3068    default: abort();
3069    }
3070}
3071
3072static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3073                              uint64_t value, unsigned size)
3074{
3075    ram_addr_t raddr = addr;
3076    void *ptr = qemu_get_ram_ptr(raddr);
3077    switch (size) {
3078    case 1: return stb_p(ptr, value);
3079    case 2: return stw_p(ptr, value);
3080    case 4: return stl_p(ptr, value);
3081    default: abort();
3082    }
3083}
3084
3085static const MemoryRegionOps subpage_ram_ops = {
3086    .read = subpage_ram_read,
3087    .write = subpage_ram_write,
3088    .endianness = DEVICE_NATIVE_ENDIAN,
3089};
3090
3091static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3092                             uint16_t section)
3093{
3094    int idx, eidx;
3095
3096    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3097        return -1;
3098    idx = SUBPAGE_IDX(start);
3099    eidx = SUBPAGE_IDX(end);
3100#if defined(DEBUG_SUBPAGE)
3101    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3102           mmio, start, end, idx, eidx, memory);
3103#endif
3104    if (memory_region_is_ram(phys_sections[section].mr)) {
3105        MemoryRegionSection new_section = phys_sections[section];
3106        new_section.mr = &io_mem_subpage_ram;
3107        section = phys_section_add(&new_section);
3108    }
3109    for (; idx <= eidx; idx++) {
3110        mmio->sub_section[idx] = section;
3111    }
3112
3113    return 0;
3114}
3115
3116static subpage_t *subpage_init(target_phys_addr_t base)
3117{
3118    subpage_t *mmio;
3119
3120    mmio = g_malloc0(sizeof(subpage_t));
3121
3122    mmio->base = base;
3123    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3124                          "subpage", TARGET_PAGE_SIZE);
3125    mmio->iomem.subpage = true;
3126#if defined(DEBUG_SUBPAGE)
3127    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3128           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3129#endif
3130    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3131
3132    return mmio;
3133}
3134
3135static uint16_t dummy_section(MemoryRegion *mr)
3136{
3137    MemoryRegionSection section = {
3138        .mr = mr,
3139        .offset_within_address_space = 0,
3140        .offset_within_region = 0,
3141        .size = UINT64_MAX,
3142    };
3143
3144    return phys_section_add(&section);
3145}
3146
3147MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3148{
3149    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3150}
3151
3152static void io_mem_init(void)
3153{
3154    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3155    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3156    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3157                          "unassigned", UINT64_MAX);
3158    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3159                          "notdirty", UINT64_MAX);
3160    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3161                          "subpage-ram", UINT64_MAX);
3162    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3163                          "watch", UINT64_MAX);
3164}
3165
3166static void core_begin(MemoryListener *listener)
3167{
3168    destroy_all_mappings();
3169    phys_sections_clear();
3170    phys_map.ptr = PHYS_MAP_NODE_NIL;
3171    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3172    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3173    phys_section_rom = dummy_section(&io_mem_rom);
3174    phys_section_watch = dummy_section(&io_mem_watch);
3175}
3176
3177static void core_commit(MemoryListener *listener)
3178{
3179    CPUArchState *env;
3180
3181    /* since each CPU stores ram addresses in its TLB cache, we must
3182       reset the modified entries */
3183    /* XXX: slow ! */
3184    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3185        tlb_flush(env, 1);
3186    }
3187}
3188
3189static void core_region_add(MemoryListener *listener,
3190                            MemoryRegionSection *section)
3191{
3192    cpu_register_physical_memory_log(section, section->readonly);
3193}
3194
3195static void core_region_del(MemoryListener *listener,
3196                            MemoryRegionSection *section)
3197{
3198}
3199
3200static void core_region_nop(MemoryListener *listener,
3201                            MemoryRegionSection *section)
3202{
3203    cpu_register_physical_memory_log(section, section->readonly);
3204}
3205
3206static void core_log_start(MemoryListener *listener,
3207                           MemoryRegionSection *section)
3208{
3209}
3210
3211static void core_log_stop(MemoryListener *listener,
3212                          MemoryRegionSection *section)
3213{
3214}
3215
3216static void core_log_sync(MemoryListener *listener,
3217                          MemoryRegionSection *section)
3218{
3219}
3220
3221static void core_log_global_start(MemoryListener *listener)
3222{
3223    cpu_physical_memory_set_dirty_tracking(1);
3224}
3225
3226static void core_log_global_stop(MemoryListener *listener)
3227{
3228    cpu_physical_memory_set_dirty_tracking(0);
3229}
3230
3231static void core_eventfd_add(MemoryListener *listener,
3232                             MemoryRegionSection *section,
3233                             bool match_data, uint64_t data, EventNotifier *e)
3234{
3235}
3236
3237static void core_eventfd_del(MemoryListener *listener,
3238                             MemoryRegionSection *section,
3239                             bool match_data, uint64_t data, EventNotifier *e)
3240{
3241}
3242
3243static void io_begin(MemoryListener *listener)
3244{
3245}
3246
3247static void io_commit(MemoryListener *listener)
3248{
3249}
3250
3251static void io_region_add(MemoryListener *listener,
3252                          MemoryRegionSection *section)
3253{
3254    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3255
3256    mrio->mr = section->mr;
3257    mrio->offset = section->offset_within_region;
3258    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3259                 section->offset_within_address_space, section->size);
3260    ioport_register(&mrio->iorange);
3261}
3262
3263static void io_region_del(MemoryListener *listener,
3264                          MemoryRegionSection *section)
3265{
3266    isa_unassign_ioport(section->offset_within_address_space, section->size);
3267}
3268
3269static void io_region_nop(MemoryListener *listener,
3270                          MemoryRegionSection *section)
3271{
3272}
3273
3274static void io_log_start(MemoryListener *listener,
3275                         MemoryRegionSection *section)
3276{
3277}
3278
3279static void io_log_stop(MemoryListener *listener,
3280                        MemoryRegionSection *section)
3281{
3282}
3283
3284static void io_log_sync(MemoryListener *listener,
3285                        MemoryRegionSection *section)
3286{
3287}
3288
3289static void io_log_global_start(MemoryListener *listener)
3290{
3291}
3292
3293static void io_log_global_stop(MemoryListener *listener)
3294{
3295}
3296
3297static void io_eventfd_add(MemoryListener *listener,
3298                           MemoryRegionSection *section,
3299                           bool match_data, uint64_t data, EventNotifier *e)
3300{
3301}
3302
3303static void io_eventfd_del(MemoryListener *listener,
3304                           MemoryRegionSection *section,
3305                           bool match_data, uint64_t data, EventNotifier *e)
3306{
3307}
3308
3309static MemoryListener core_memory_listener = {
3310    .begin = core_begin,
3311    .commit = core_commit,
3312    .region_add = core_region_add,
3313    .region_del = core_region_del,
3314    .region_nop = core_region_nop,
3315    .log_start = core_log_start,
3316    .log_stop = core_log_stop,
3317    .log_sync = core_log_sync,
3318    .log_global_start = core_log_global_start,
3319    .log_global_stop = core_log_global_stop,
3320    .eventfd_add = core_eventfd_add,
3321    .eventfd_del = core_eventfd_del,
3322    .priority = 0,
3323};
3324
3325static MemoryListener io_memory_listener = {
3326    .begin = io_begin,
3327    .commit = io_commit,
3328    .region_add = io_region_add,
3329    .region_del = io_region_del,
3330    .region_nop = io_region_nop,
3331    .log_start = io_log_start,
3332    .log_stop = io_log_stop,
3333    .log_sync = io_log_sync,
3334    .log_global_start = io_log_global_start,
3335    .log_global_stop = io_log_global_stop,
3336    .eventfd_add = io_eventfd_add,
3337    .eventfd_del = io_eventfd_del,
3338    .priority = 0,
3339};
3340
3341static void memory_map_init(void)
3342{
3343    system_memory = g_malloc(sizeof(*system_memory));
3344    memory_region_init(system_memory, "system", INT64_MAX);
3345    set_system_memory_map(system_memory);
3346
3347    system_io = g_malloc(sizeof(*system_io));
3348    memory_region_init(system_io, "io", 65536);
3349    set_system_io_map(system_io);
3350
3351    memory_listener_register(&core_memory_listener, system_memory);
3352    memory_listener_register(&io_memory_listener, system_io);
3353}
3354
3355MemoryRegion *get_system_memory(void)
3356{
3357    return system_memory;
3358}
3359
3360MemoryRegion *get_system_io(void)
3361{
3362    return system_io;
3363}
3364
3365#endif /* !defined(CONFIG_USER_ONLY) */
3366
3367/* physical memory access (slow version, mainly for debug) */
3368#if defined(CONFIG_USER_ONLY)
3369int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3370                        uint8_t *buf, int len, int is_write)
3371{
3372    int l, flags;
3373    target_ulong page;
3374    void * p;
3375
3376    while (len > 0) {
3377        page = addr & TARGET_PAGE_MASK;
3378        l = (page + TARGET_PAGE_SIZE) - addr;
3379        if (l > len)
3380            l = len;
3381        flags = page_get_flags(page);
3382        if (!(flags & PAGE_VALID))
3383            return -1;
3384        if (is_write) {
3385            if (!(flags & PAGE_WRITE))
3386                return -1;
3387            /* XXX: this code should not depend on lock_user */
3388            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3389                return -1;
3390            memcpy(p, buf, l);
3391            unlock_user(p, addr, l);
3392        } else {
3393            if (!(flags & PAGE_READ))
3394                return -1;
3395            /* XXX: this code should not depend on lock_user */
3396            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3397                return -1;
3398            memcpy(buf, p, l);
3399            unlock_user(p, addr, 0);
3400        }
3401        len -= l;
3402        buf += l;
3403        addr += l;
3404    }
3405    return 0;
3406}
3407
3408#else
3409void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3410                            int len, int is_write)
3411{
3412    int l;
3413    uint8_t *ptr;
3414    uint32_t val;
3415    target_phys_addr_t page;
3416    MemoryRegionSection *section;
3417
3418    while (len > 0) {
3419        page = addr & TARGET_PAGE_MASK;
3420        l = (page + TARGET_PAGE_SIZE) - addr;
3421        if (l > len)
3422            l = len;
3423        section = phys_page_find(page >> TARGET_PAGE_BITS);
3424
3425        if (is_write) {
3426            if (!memory_region_is_ram(section->mr)) {
3427                target_phys_addr_t addr1;
3428                addr1 = memory_region_section_addr(section, addr);
3429                /* XXX: could force cpu_single_env to NULL to avoid
3430                   potential bugs */
3431                if (l >= 4 && ((addr1 & 3) == 0)) {
3432                    /* 32 bit write access */
3433                    val = ldl_p(buf);
3434                    io_mem_write(section->mr, addr1, val, 4);
3435                    l = 4;
3436                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3437                    /* 16 bit write access */
3438                    val = lduw_p(buf);
3439                    io_mem_write(section->mr, addr1, val, 2);
3440                    l = 2;
3441                } else {
3442                    /* 8 bit write access */
3443                    val = ldub_p(buf);
3444                    io_mem_write(section->mr, addr1, val, 1);
3445                    l = 1;
3446                }
3447            } else if (!section->readonly) {
3448                ram_addr_t addr1;
3449                addr1 = memory_region_get_ram_addr(section->mr)
3450                    + memory_region_section_addr(section, addr);
3451                /* RAM case */
3452                ptr = qemu_get_ram_ptr(addr1);
3453                memcpy(ptr, buf, l);
3454                if (!cpu_physical_memory_is_dirty(addr1)) {
3455                    /* invalidate code */
3456                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3457                    /* set dirty bit */
3458                    cpu_physical_memory_set_dirty_flags(
3459                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3460                }
3461                qemu_put_ram_ptr(ptr);
3462            }
3463        } else {
3464            if (!(memory_region_is_ram(section->mr) ||
3465                  memory_region_is_romd(section->mr))) {
3466                target_phys_addr_t addr1;
3467                /* I/O case */
3468                addr1 = memory_region_section_addr(section, addr);
3469                if (l >= 4 && ((addr1 & 3) == 0)) {
3470                    /* 32 bit read access */
3471                    val = io_mem_read(section->mr, addr1, 4);
3472                    stl_p(buf, val);
3473                    l = 4;
3474                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3475                    /* 16 bit read access */
3476                    val = io_mem_read(section->mr, addr1, 2);
3477                    stw_p(buf, val);
3478                    l = 2;
3479                } else {
3480                    /* 8 bit read access */
3481                    val = io_mem_read(section->mr, addr1, 1);
3482                    stb_p(buf, val);
3483                    l = 1;
3484                }
3485            } else {
3486                /* RAM case */
3487                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3488                                       + memory_region_section_addr(section,
3489                                                                    addr));
3490                memcpy(buf, ptr, l);
3491                qemu_put_ram_ptr(ptr);
3492            }
3493        }
3494        len -= l;
3495        buf += l;
3496        addr += l;
3497    }
3498}
3499
3500/* used for ROM loading : can write in RAM and ROM */
3501void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3502                                   const uint8_t *buf, int len)
3503{
3504    int l;
3505    uint8_t *ptr;
3506    target_phys_addr_t page;
3507    MemoryRegionSection *section;
3508
3509    while (len > 0) {
3510        page = addr & TARGET_PAGE_MASK;
3511        l = (page + TARGET_PAGE_SIZE) - addr;
3512        if (l > len)
3513            l = len;
3514        section = phys_page_find(page >> TARGET_PAGE_BITS);
3515
3516        if (!(memory_region_is_ram(section->mr) ||
3517              memory_region_is_romd(section->mr))) {
3518            /* do nothing */
3519        } else {
3520            unsigned long addr1;
3521            addr1 = memory_region_get_ram_addr(section->mr)
3522                + memory_region_section_addr(section, addr);
3523            /* ROM/RAM case */
3524            ptr = qemu_get_ram_ptr(addr1);
3525            memcpy(ptr, buf, l);
3526            qemu_put_ram_ptr(ptr);
3527        }
3528        len -= l;
3529        buf += l;
3530        addr += l;
3531    }
3532}
3533
3534typedef struct {
3535    void *buffer;
3536    target_phys_addr_t addr;
3537    target_phys_addr_t len;
3538} BounceBuffer;
3539
3540static BounceBuffer bounce;
3541
3542typedef struct MapClient {
3543    void *opaque;
3544    void (*callback)(void *opaque);
3545    QLIST_ENTRY(MapClient) link;
3546} MapClient;
3547
3548static QLIST_HEAD(map_client_list, MapClient) map_client_list
3549    = QLIST_HEAD_INITIALIZER(map_client_list);
3550
3551void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3552{
3553    MapClient *client = g_malloc(sizeof(*client));
3554
3555    client->opaque = opaque;
3556    client->callback = callback;
3557    QLIST_INSERT_HEAD(&map_client_list, client, link);
3558    return client;
3559}
3560
3561void cpu_unregister_map_client(void *_client)
3562{
3563    MapClient *client = (MapClient *)_client;
3564
3565    QLIST_REMOVE(client, link);
3566    g_free(client);
3567}
3568
3569static void cpu_notify_map_clients(void)
3570{
3571    MapClient *client;
3572
3573    while (!QLIST_EMPTY(&map_client_list)) {
3574        client = QLIST_FIRST(&map_client_list);
3575        client->callback(client->opaque);
3576        cpu_unregister_map_client(client);
3577    }
3578}
3579
3580/* Map a physical memory region into a host virtual address.
3581 * May map a subset of the requested range, given by and returned in *plen.
3582 * May return NULL if resources needed to perform the mapping are exhausted.
3583 * Use only for reads OR writes - not for read-modify-write operations.
3584 * Use cpu_register_map_client() to know when retrying the map operation is
3585 * likely to succeed.
3586 */
3587void *cpu_physical_memory_map(target_phys_addr_t addr,
3588                              target_phys_addr_t *plen,
3589                              int is_write)
3590{
3591    target_phys_addr_t len = *plen;
3592    target_phys_addr_t todo = 0;
3593    int l;
3594    target_phys_addr_t page;
3595    MemoryRegionSection *section;
3596    ram_addr_t raddr = RAM_ADDR_MAX;
3597    ram_addr_t rlen;
3598    void *ret;
3599
3600    while (len > 0) {
3601        page = addr & TARGET_PAGE_MASK;
3602        l = (page + TARGET_PAGE_SIZE) - addr;
3603        if (l > len)
3604            l = len;
3605        section = phys_page_find(page >> TARGET_PAGE_BITS);
3606
3607        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3608            if (todo || bounce.buffer) {
3609                break;
3610            }
3611            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3612            bounce.addr = addr;
3613            bounce.len = l;
3614            if (!is_write) {
3615                cpu_physical_memory_read(addr, bounce.buffer, l);
3616            }
3617
3618            *plen = l;
3619            return bounce.buffer;
3620        }
3621        if (!todo) {
3622            raddr = memory_region_get_ram_addr(section->mr)
3623                + memory_region_section_addr(section, addr);
3624        }
3625
3626        len -= l;
3627        addr += l;
3628        todo += l;
3629    }
3630    rlen = todo;
3631    ret = qemu_ram_ptr_length(raddr, &rlen);
3632    *plen = rlen;
3633    return ret;
3634}
3635
3636/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3637 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3638 * the amount of memory that was actually read or written by the caller.
3639 */
3640void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3641                               int is_write, target_phys_addr_t access_len)
3642{
3643    if (buffer != bounce.buffer) {
3644        if (is_write) {
3645            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3646            while (access_len) {
3647                unsigned l;
3648                l = TARGET_PAGE_SIZE;
3649                if (l > access_len)
3650                    l = access_len;
3651                if (!cpu_physical_memory_is_dirty(addr1)) {
3652                    /* invalidate code */
3653                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3654                    /* set dirty bit */
3655                    cpu_physical_memory_set_dirty_flags(
3656                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3657                }
3658                addr1 += l;
3659                access_len -= l;
3660            }
3661        }
3662        if (xen_enabled()) {
3663            xen_invalidate_map_cache_entry(buffer);
3664        }
3665        return;
3666    }
3667    if (is_write) {
3668        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3669    }
3670    qemu_vfree(bounce.buffer);
3671    bounce.buffer = NULL;
3672    cpu_notify_map_clients();
3673}
3674
3675/* warning: addr must be aligned */
3676static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3677                                         enum device_endian endian)
3678{
3679    uint8_t *ptr;
3680    uint32_t val;
3681    MemoryRegionSection *section;
3682
3683    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3684
3685    if (!(memory_region_is_ram(section->mr) ||
3686          memory_region_is_romd(section->mr))) {
3687        /* I/O case */
3688        addr = memory_region_section_addr(section, addr);
3689        val = io_mem_read(section->mr, addr, 4);
3690#if defined(TARGET_WORDS_BIGENDIAN)
3691        if (endian == DEVICE_LITTLE_ENDIAN) {
3692            val = bswap32(val);
3693        }
3694#else
3695        if (endian == DEVICE_BIG_ENDIAN) {
3696            val = bswap32(val);
3697        }
3698#endif
3699    } else {
3700        /* RAM case */
3701        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3702                                & TARGET_PAGE_MASK)
3703                               + memory_region_section_addr(section, addr));
3704        switch (endian) {
3705        case DEVICE_LITTLE_ENDIAN:
3706            val = ldl_le_p(ptr);
3707            break;
3708        case DEVICE_BIG_ENDIAN:
3709            val = ldl_be_p(ptr);
3710            break;
3711        default:
3712            val = ldl_p(ptr);
3713            break;
3714        }
3715    }
3716    return val;
3717}
3718
3719uint32_t ldl_phys(target_phys_addr_t addr)
3720{
3721    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3722}
3723
3724uint32_t ldl_le_phys(target_phys_addr_t addr)
3725{
3726    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3727}
3728
3729uint32_t ldl_be_phys(target_phys_addr_t addr)
3730{
3731    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3732}
3733
3734/* warning: addr must be aligned */
3735static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3736                                         enum device_endian endian)
3737{
3738    uint8_t *ptr;
3739    uint64_t val;
3740    MemoryRegionSection *section;
3741
3742    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3743
3744    if (!(memory_region_is_ram(section->mr) ||
3745          memory_region_is_romd(section->mr))) {
3746        /* I/O case */
3747        addr = memory_region_section_addr(section, addr);
3748
3749        /* XXX This is broken when device endian != cpu endian.
3750               Fix and add "endian" variable check */
3751#ifdef TARGET_WORDS_BIGENDIAN
3752        val = io_mem_read(section->mr, addr, 4) << 32;
3753        val |= io_mem_read(section->mr, addr + 4, 4);
3754#else
3755        val = io_mem_read(section->mr, addr, 4);
3756        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3757#endif
3758    } else {
3759        /* RAM case */
3760        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3761                                & TARGET_PAGE_MASK)
3762                               + memory_region_section_addr(section, addr));
3763        switch (endian) {
3764        case DEVICE_LITTLE_ENDIAN:
3765            val = ldq_le_p(ptr);
3766            break;
3767        case DEVICE_BIG_ENDIAN:
3768            val = ldq_be_p(ptr);
3769            break;
3770        default:
3771            val = ldq_p(ptr);
3772            break;
3773        }
3774    }
3775    return val;
3776}
3777
3778uint64_t ldq_phys(target_phys_addr_t addr)
3779{
3780    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3781}
3782
3783uint64_t ldq_le_phys(target_phys_addr_t addr)
3784{
3785    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3786}
3787
3788uint64_t ldq_be_phys(target_phys_addr_t addr)
3789{
3790    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3791}
3792
3793/* XXX: optimize */
3794uint32_t ldub_phys(target_phys_addr_t addr)
3795{
3796    uint8_t val;
3797    cpu_physical_memory_read(addr, &val, 1);
3798    return val;
3799}
3800
3801/* warning: addr must be aligned */
3802static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3803                                          enum device_endian endian)
3804{
3805    uint8_t *ptr;
3806    uint64_t val;
3807    MemoryRegionSection *section;
3808
3809    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3810
3811    if (!(memory_region_is_ram(section->mr) ||
3812          memory_region_is_romd(section->mr))) {
3813        /* I/O case */
3814        addr = memory_region_section_addr(section, addr);
3815        val = io_mem_read(section->mr, addr, 2);
3816#if defined(TARGET_WORDS_BIGENDIAN)
3817        if (endian == DEVICE_LITTLE_ENDIAN) {
3818            val = bswap16(val);
3819        }
3820#else
3821        if (endian == DEVICE_BIG_ENDIAN) {
3822            val = bswap16(val);
3823        }
3824#endif
3825    } else {
3826        /* RAM case */
3827        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3828                                & TARGET_PAGE_MASK)
3829                               + memory_region_section_addr(section, addr));
3830        switch (endian) {
3831        case DEVICE_LITTLE_ENDIAN:
3832            val = lduw_le_p(ptr);
3833            break;
3834        case DEVICE_BIG_ENDIAN:
3835            val = lduw_be_p(ptr);
3836            break;
3837        default:
3838            val = lduw_p(ptr);
3839            break;
3840        }
3841    }
3842    return val;
3843}
3844
3845uint32_t lduw_phys(target_phys_addr_t addr)
3846{
3847    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3848}
3849
3850uint32_t lduw_le_phys(target_phys_addr_t addr)
3851{
3852    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3853}
3854
3855uint32_t lduw_be_phys(target_phys_addr_t addr)
3856{
3857    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3858}
3859
3860/* warning: addr must be aligned. The ram page is not masked as dirty
3861   and the code inside is not invalidated. It is useful if the dirty
3862   bits are used to track modified PTEs */
3863void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3864{
3865    uint8_t *ptr;
3866    MemoryRegionSection *section;
3867
3868    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3869
3870    if (!memory_region_is_ram(section->mr) || section->readonly) {
3871        addr = memory_region_section_addr(section, addr);
3872        if (memory_region_is_ram(section->mr)) {
3873            section = &phys_sections[phys_section_rom];
3874        }
3875        io_mem_write(section->mr, addr, val, 4);
3876    } else {
3877        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3878                               & TARGET_PAGE_MASK)
3879            + memory_region_section_addr(section, addr);
3880        ptr = qemu_get_ram_ptr(addr1);
3881        stl_p(ptr, val);
3882
3883        if (unlikely(in_migration)) {
3884            if (!cpu_physical_memory_is_dirty(addr1)) {
3885                /* invalidate code */
3886                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3887                /* set dirty bit */
3888                cpu_physical_memory_set_dirty_flags(
3889                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3890            }
3891        }
3892    }
3893}
3894
3895void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3896{
3897    uint8_t *ptr;
3898    MemoryRegionSection *section;
3899
3900    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3901
3902    if (!memory_region_is_ram(section->mr) || section->readonly) {
3903        addr = memory_region_section_addr(section, addr);
3904        if (memory_region_is_ram(section->mr)) {
3905            section = &phys_sections[phys_section_rom];
3906        }
3907#ifdef TARGET_WORDS_BIGENDIAN
3908        io_mem_write(section->mr, addr, val >> 32, 4);
3909        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3910#else
3911        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3912        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3913#endif
3914    } else {
3915        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3916                                & TARGET_PAGE_MASK)
3917                               + memory_region_section_addr(section, addr));
3918        stq_p(ptr, val);
3919    }
3920}
3921
3922/* warning: addr must be aligned */
3923static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3924                                     enum device_endian endian)
3925{
3926    uint8_t *ptr;
3927    MemoryRegionSection *section;
3928
3929    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3930
3931    if (!memory_region_is_ram(section->mr) || section->readonly) {
3932        addr = memory_region_section_addr(section, addr);
3933        if (memory_region_is_ram(section->mr)) {
3934            section = &phys_sections[phys_section_rom];
3935        }
3936#if defined(TARGET_WORDS_BIGENDIAN)
3937        if (endian == DEVICE_LITTLE_ENDIAN) {
3938            val = bswap32(val);
3939        }
3940#else
3941        if (endian == DEVICE_BIG_ENDIAN) {
3942            val = bswap32(val);
3943        }
3944#endif
3945        io_mem_write(section->mr, addr, val, 4);
3946    } else {
3947        unsigned long addr1;
3948        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3949            + memory_region_section_addr(section, addr);
3950        /* RAM case */
3951        ptr = qemu_get_ram_ptr(addr1);
3952        switch (endian) {
3953        case DEVICE_LITTLE_ENDIAN:
3954            stl_le_p(ptr, val);
3955            break;
3956        case DEVICE_BIG_ENDIAN:
3957            stl_be_p(ptr, val);
3958            break;
3959        default:
3960            stl_p(ptr, val);
3961            break;
3962        }
3963        if (!cpu_physical_memory_is_dirty(addr1)) {
3964            /* invalidate code */
3965            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3966            /* set dirty bit */
3967            cpu_physical_memory_set_dirty_flags(addr1,
3968                (0xff & ~CODE_DIRTY_FLAG));
3969        }
3970    }
3971}
3972
3973void stl_phys(target_phys_addr_t addr, uint32_t val)
3974{
3975    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3976}
3977
3978void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3979{
3980    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3981}
3982
3983void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3984{
3985    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3986}
3987
3988/* XXX: optimize */
3989void stb_phys(target_phys_addr_t addr, uint32_t val)
3990{
3991    uint8_t v = val;
3992    cpu_physical_memory_write(addr, &v, 1);
3993}
3994
3995/* warning: addr must be aligned */
3996static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3997                                     enum device_endian endian)
3998{
3999    uint8_t *ptr;
4000    MemoryRegionSection *section;
4001
4002    section = phys_page_find(addr >> TARGET_PAGE_BITS);
4003
4004    if (!memory_region_is_ram(section->mr) || section->readonly) {
4005        addr = memory_region_section_addr(section, addr);
4006        if (memory_region_is_ram(section->mr)) {
4007            section = &phys_sections[phys_section_rom];
4008        }
4009#if defined(TARGET_WORDS_BIGENDIAN)
4010        if (endian == DEVICE_LITTLE_ENDIAN) {
4011            val = bswap16(val);
4012        }
4013#else
4014        if (endian == DEVICE_BIG_ENDIAN) {
4015            val = bswap16(val);
4016        }
4017#endif
4018        io_mem_write(section->mr, addr, val, 2);
4019    } else {
4020        unsigned long addr1;
4021        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4022            + memory_region_section_addr(section, addr);
4023        /* RAM case */
4024        ptr = qemu_get_ram_ptr(addr1);
4025        switch (endian) {
4026        case DEVICE_LITTLE_ENDIAN:
4027            stw_le_p(ptr, val);
4028            break;
4029        case DEVICE_BIG_ENDIAN:
4030            stw_be_p(ptr, val);
4031            break;
4032        default:
4033            stw_p(ptr, val);
4034            break;
4035        }
4036        if (!cpu_physical_memory_is_dirty(addr1)) {
4037            /* invalidate code */
4038            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4039            /* set dirty bit */
4040            cpu_physical_memory_set_dirty_flags(addr1,
4041                (0xff & ~CODE_DIRTY_FLAG));
4042        }
4043    }
4044}
4045
4046void stw_phys(target_phys_addr_t addr, uint32_t val)
4047{
4048    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4049}
4050
4051void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4052{
4053    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4054}
4055
4056void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4057{
4058    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4059}
4060
4061/* XXX: optimize */
4062void stq_phys(target_phys_addr_t addr, uint64_t val)
4063{
4064    val = tswap64(val);
4065    cpu_physical_memory_write(addr, &val, 8);
4066}
4067
4068void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4069{
4070    val = cpu_to_le64(val);
4071    cpu_physical_memory_write(addr, &val, 8);
4072}
4073
4074void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4075{
4076    val = cpu_to_be64(val);
4077    cpu_physical_memory_write(addr, &val, 8);
4078}
4079
4080/* virtual memory access for debug (includes writing to ROM) */
4081int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4082                        uint8_t *buf, int len, int is_write)
4083{
4084    int l;
4085    target_phys_addr_t phys_addr;
4086    target_ulong page;
4087
4088    while (len > 0) {
4089        page = addr & TARGET_PAGE_MASK;
4090        phys_addr = cpu_get_phys_page_debug(env, page);
4091        /* if no physical page mapped, return an error */
4092        if (phys_addr == -1)
4093            return -1;
4094        l = (page + TARGET_PAGE_SIZE) - addr;
4095        if (l > len)
4096            l = len;
4097        phys_addr += (addr & ~TARGET_PAGE_MASK);
4098        if (is_write)
4099            cpu_physical_memory_write_rom(phys_addr, buf, l);
4100        else
4101            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4102        len -= l;
4103        buf += l;
4104        addr += l;
4105    }
4106    return 0;
4107}
4108#endif
4109
4110/* in deterministic execution mode, instructions doing device I/Os
4111   must be at the end of the TB */
4112void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4113{
4114    TranslationBlock *tb;
4115    uint32_t n, cflags;
4116    target_ulong pc, cs_base;
4117    uint64_t flags;
4118
4119    tb = tb_find_pc(retaddr);
4120    if (!tb) {
4121        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4122                  (void *)retaddr);
4123    }
4124    n = env->icount_decr.u16.low + tb->icount;
4125    cpu_restore_state(tb, env, retaddr);
4126    /* Calculate how many instructions had been executed before the fault
4127       occurred.  */
4128    n = n - env->icount_decr.u16.low;
4129    /* Generate a new TB ending on the I/O insn.  */
4130    n++;
4131    /* On MIPS and SH, delay slot instructions can only be restarted if
4132       they were already the first instruction in the TB.  If this is not
4133       the first instruction in a TB then re-execute the preceding
4134       branch.  */
4135#if defined(TARGET_MIPS)
4136    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4137        env->active_tc.PC -= 4;
4138        env->icount_decr.u16.low++;
4139        env->hflags &= ~MIPS_HFLAG_BMASK;
4140    }
4141#elif defined(TARGET_SH4)
4142    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4143            && n > 1) {
4144        env->pc -= 2;
4145        env->icount_decr.u16.low++;
4146        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4147    }
4148#endif
4149    /* This should never happen.  */
4150    if (n > CF_COUNT_MASK)
4151        cpu_abort(env, "TB too big during recompile");
4152
4153    cflags = n | CF_LAST_IO;
4154    pc = tb->pc;
4155    cs_base = tb->cs_base;
4156    flags = tb->flags;
4157    tb_phys_invalidate(tb, -1);
4158    /* FIXME: In theory this could raise an exception.  In practice
4159       we have already translated the block once so it's probably ok.  */
4160    tb_gen_code(env, pc, cs_base, flags, cflags);
4161    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4162       the first in the TB) then we end up generating a whole new TB and
4163       repeating the fault, which is horribly inefficient.
4164       Better would be to execute just this insn uncached, or generate a
4165       second new TB.  */
4166    cpu_resume_from_signal(env, NULL);
4167}
4168
4169#if !defined(CONFIG_USER_ONLY)
4170
4171void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4172{
4173    int i, target_code_size, max_target_code_size;
4174    int direct_jmp_count, direct_jmp2_count, cross_page;
4175    TranslationBlock *tb;
4176
4177    target_code_size = 0;
4178    max_target_code_size = 0;
4179    cross_page = 0;
4180    direct_jmp_count = 0;
4181    direct_jmp2_count = 0;
4182    for(i = 0; i < nb_tbs; i++) {
4183        tb = &tbs[i];
4184        target_code_size += tb->size;
4185        if (tb->size > max_target_code_size)
4186            max_target_code_size = tb->size;
4187        if (tb->page_addr[1] != -1)
4188            cross_page++;
4189        if (tb->tb_next_offset[0] != 0xffff) {
4190            direct_jmp_count++;
4191            if (tb->tb_next_offset[1] != 0xffff) {
4192                direct_jmp2_count++;
4193            }
4194        }
4195    }
4196    /* XXX: avoid using doubles ? */
4197    cpu_fprintf(f, "Translation buffer state:\n");
4198    cpu_fprintf(f, "gen code size       %td/%ld\n",
4199                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4200    cpu_fprintf(f, "TB count            %d/%d\n", 
4201                nb_tbs, code_gen_max_blocks);
4202    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4203                nb_tbs ? target_code_size / nb_tbs : 0,
4204                max_target_code_size);
4205    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4206                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4207                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4208    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4209            cross_page,
4210            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4211    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4212                direct_jmp_count,
4213                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4214                direct_jmp2_count,
4215                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4216    cpu_fprintf(f, "\nStatistics:\n");
4217    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4218    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4219    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4220    tcg_dump_info(f, cpu_fprintf);
4221}
4222
4223/*
4224 * A helper function for the _utterly broken_ virtio device model to find out if
4225 * it's running on a big endian machine. Don't do this at home kids!
4226 */
4227bool virtio_is_big_endian(void);
4228bool virtio_is_big_endian(void)
4229{
4230#if defined(TARGET_WORDS_BIGENDIAN)
4231    return true;
4232#else
4233    return false;
4234#endif
4235}
4236
4237#endif
4238
4239#ifndef CONFIG_USER_ONLY
4240bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4241{
4242    MemoryRegionSection *section;
4243
4244    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4245
4246    return !(memory_region_is_ram(section->mr) ||
4247             memory_region_is_romd(section->mr));
4248}
4249#endif
4250