qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  virtual page mapping and translated block handling
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifdef _WIN32
  21#include <windows.h>
  22#else
  23#include <sys/types.h>
  24#include <sys/mman.h>
  25#endif
  26
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "tcg.h"
  30#include "hw/hw.h"
  31#include "hw/qdev.h"
  32#include "osdep.h"
  33#include "kvm.h"
  34#include "hw/xen.h"
  35#include "qemu-timer.h"
  36#include "memory.h"
  37#include "dma.h"
  38#include "exec-memory.h"
  39#if defined(CONFIG_USER_ONLY)
  40#include <qemu.h>
  41#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  42#include <sys/param.h>
  43#if __FreeBSD_version >= 700104
  44#define HAVE_KINFO_GETVMMAP
  45#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  46#include <sys/time.h>
  47#include <sys/proc.h>
  48#include <machine/profile.h>
  49#define _KERNEL
  50#include <sys/user.h>
  51#undef _KERNEL
  52#undef sigqueue
  53#include <libutil.h>
  54#endif
  55#endif
  56#else /* !CONFIG_USER_ONLY */
  57#include "xen-mapcache.h"
  58#include "trace.h"
  59#endif
  60
  61#include "cputlb.h"
  62
  63#include "memory-internal.h"
  64
  65//#define DEBUG_TB_INVALIDATE
  66//#define DEBUG_FLUSH
  67//#define DEBUG_UNASSIGNED
  68
  69/* make various TB consistency checks */
  70//#define DEBUG_TB_CHECK
  71
  72//#define DEBUG_IOPORT
  73//#define DEBUG_SUBPAGE
  74
  75#if !defined(CONFIG_USER_ONLY)
  76/* TB consistency checks only implemented for usermode emulation.  */
  77#undef DEBUG_TB_CHECK
  78#endif
  79
  80#define SMC_BITMAP_USE_THRESHOLD 10
  81
  82static TranslationBlock *tbs;
  83static int code_gen_max_blocks;
  84TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
  85static int nb_tbs;
  86/* any access to the tbs or the page table must use this lock */
  87spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
  88
  89uint8_t *code_gen_prologue;
  90static uint8_t *code_gen_buffer;
  91static size_t code_gen_buffer_size;
  92/* threshold to flush the translated code buffer */
  93static size_t code_gen_buffer_max_size;
  94static uint8_t *code_gen_ptr;
  95
  96#if !defined(CONFIG_USER_ONLY)
  97int phys_ram_fd;
  98static int in_migration;
  99
 100RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
 101
 102static MemoryRegion *system_memory;
 103static MemoryRegion *system_io;
 104
 105AddressSpace address_space_io;
 106AddressSpace address_space_memory;
 107DMAContext dma_context_memory;
 108
 109MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
 110static MemoryRegion io_mem_subpage_ram;
 111
 112#endif
 113
 114CPUArchState *first_cpu;
 115/* current CPU in the current thread. It is only valid inside
 116   cpu_exec() */
 117DEFINE_TLS(CPUArchState *,cpu_single_env);
 118/* 0 = Do not count executed instructions.
 119   1 = Precise instruction counting.
 120   2 = Adaptive rate instruction counting.  */
 121int use_icount = 0;
 122
 123typedef struct PageDesc {
 124    /* list of TBs intersecting this ram page */
 125    TranslationBlock *first_tb;
 126    /* in order to optimize self modifying code, we count the number
 127       of lookups we do to a given page to use a bitmap */
 128    unsigned int code_write_count;
 129    uint8_t *code_bitmap;
 130#if defined(CONFIG_USER_ONLY)
 131    unsigned long flags;
 132#endif
 133} PageDesc;
 134
 135/* In system mode we want L1_MAP to be based on ram offsets,
 136   while in user mode we want it to be based on virtual addresses.  */
 137#if !defined(CONFIG_USER_ONLY)
 138#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 139# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 140#else
 141# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 142#endif
 143#else
 144# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
 145#endif
 146
 147/* Size of the L2 (and L3, etc) page tables.  */
 148#define L2_BITS 10
 149#define L2_SIZE (1 << L2_BITS)
 150
 151#define P_L2_LEVELS \
 152    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
 153
 154/* The bits remaining after N lower levels of page tables.  */
 155#define V_L1_BITS_REM \
 156    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
 157
 158#if V_L1_BITS_REM < 4
 159#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
 160#else
 161#define V_L1_BITS  V_L1_BITS_REM
 162#endif
 163
 164#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
 165
 166#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 167
 168uintptr_t qemu_real_host_page_size;
 169uintptr_t qemu_host_page_size;
 170uintptr_t qemu_host_page_mask;
 171
 172/* This is a multi-level map on the virtual address space.
 173   The bottom level has pointers to PageDesc.  */
 174static void *l1_map[V_L1_SIZE];
 175
 176#if !defined(CONFIG_USER_ONLY)
 177
 178static MemoryRegionSection *phys_sections;
 179static unsigned phys_sections_nb, phys_sections_nb_alloc;
 180static uint16_t phys_section_unassigned;
 181static uint16_t phys_section_notdirty;
 182static uint16_t phys_section_rom;
 183static uint16_t phys_section_watch;
 184
 185/* Simple allocator for PhysPageEntry nodes */
 186static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 187static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
 188
 189#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 190
 191static void io_mem_init(void);
 192static void memory_map_init(void);
 193static void *qemu_safe_ram_ptr(ram_addr_t addr);
 194
 195static MemoryRegion io_mem_watch;
 196#endif
 197static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 198                         tb_page_addr_t phys_page2);
 199
 200/* statistics */
 201static int tb_flush_count;
 202static int tb_phys_invalidate_count;
 203
 204#ifdef _WIN32
 205static inline void map_exec(void *addr, long size)
 206{
 207    DWORD old_protect;
 208    VirtualProtect(addr, size,
 209                   PAGE_EXECUTE_READWRITE, &old_protect);
 210    
 211}
 212#else
 213static inline void map_exec(void *addr, long size)
 214{
 215    unsigned long start, end, page_size;
 216    
 217    page_size = getpagesize();
 218    start = (unsigned long)addr;
 219    start &= ~(page_size - 1);
 220    
 221    end = (unsigned long)addr + size;
 222    end += page_size - 1;
 223    end &= ~(page_size - 1);
 224    
 225    mprotect((void *)start, end - start,
 226             PROT_READ | PROT_WRITE | PROT_EXEC);
 227}
 228#endif
 229
 230static void page_init(void)
 231{
 232    /* NOTE: we can always suppose that qemu_host_page_size >=
 233       TARGET_PAGE_SIZE */
 234#ifdef _WIN32
 235    {
 236        SYSTEM_INFO system_info;
 237
 238        GetSystemInfo(&system_info);
 239        qemu_real_host_page_size = system_info.dwPageSize;
 240    }
 241#else
 242    qemu_real_host_page_size = getpagesize();
 243#endif
 244    if (qemu_host_page_size == 0)
 245        qemu_host_page_size = qemu_real_host_page_size;
 246    if (qemu_host_page_size < TARGET_PAGE_SIZE)
 247        qemu_host_page_size = TARGET_PAGE_SIZE;
 248    qemu_host_page_mask = ~(qemu_host_page_size - 1);
 249
 250#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 251    {
 252#ifdef HAVE_KINFO_GETVMMAP
 253        struct kinfo_vmentry *freep;
 254        int i, cnt;
 255
 256        freep = kinfo_getvmmap(getpid(), &cnt);
 257        if (freep) {
 258            mmap_lock();
 259            for (i = 0; i < cnt; i++) {
 260                unsigned long startaddr, endaddr;
 261
 262                startaddr = freep[i].kve_start;
 263                endaddr = freep[i].kve_end;
 264                if (h2g_valid(startaddr)) {
 265                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 266
 267                    if (h2g_valid(endaddr)) {
 268                        endaddr = h2g(endaddr);
 269                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 270                    } else {
 271#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 272                        endaddr = ~0ul;
 273                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 274#endif
 275                    }
 276                }
 277            }
 278            free(freep);
 279            mmap_unlock();
 280        }
 281#else
 282        FILE *f;
 283
 284        last_brk = (unsigned long)sbrk(0);
 285
 286        f = fopen("/compat/linux/proc/self/maps", "r");
 287        if (f) {
 288            mmap_lock();
 289
 290            do {
 291                unsigned long startaddr, endaddr;
 292                int n;
 293
 294                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 295
 296                if (n == 2 && h2g_valid(startaddr)) {
 297                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 298
 299                    if (h2g_valid(endaddr)) {
 300                        endaddr = h2g(endaddr);
 301                    } else {
 302                        endaddr = ~0ul;
 303                    }
 304                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 305                }
 306            } while (!feof(f));
 307
 308            fclose(f);
 309            mmap_unlock();
 310        }
 311#endif
 312    }
 313#endif
 314}
 315
 316static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 317{
 318    PageDesc *pd;
 319    void **lp;
 320    int i;
 321
 322#if defined(CONFIG_USER_ONLY)
 323    /* We can't use g_malloc because it may recurse into a locked mutex. */
 324# define ALLOC(P, SIZE)                                 \
 325    do {                                                \
 326        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
 327                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
 328    } while (0)
 329#else
 330# define ALLOC(P, SIZE) \
 331    do { P = g_malloc0(SIZE); } while (0)
 332#endif
 333
 334    /* Level 1.  Always allocated.  */
 335    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
 336
 337    /* Level 2..N-1.  */
 338    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
 339        void **p = *lp;
 340
 341        if (p == NULL) {
 342            if (!alloc) {
 343                return NULL;
 344            }
 345            ALLOC(p, sizeof(void *) * L2_SIZE);
 346            *lp = p;
 347        }
 348
 349        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
 350    }
 351
 352    pd = *lp;
 353    if (pd == NULL) {
 354        if (!alloc) {
 355            return NULL;
 356        }
 357        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
 358        *lp = pd;
 359    }
 360
 361#undef ALLOC
 362
 363    return pd + (index & (L2_SIZE - 1));
 364}
 365
 366static inline PageDesc *page_find(tb_page_addr_t index)
 367{
 368    return page_find_alloc(index, 0);
 369}
 370
 371#if !defined(CONFIG_USER_ONLY)
 372
 373static void phys_map_node_reserve(unsigned nodes)
 374{
 375    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
 376        typedef PhysPageEntry Node[L2_SIZE];
 377        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
 378        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
 379                                      phys_map_nodes_nb + nodes);
 380        phys_map_nodes = g_renew(Node, phys_map_nodes,
 381                                 phys_map_nodes_nb_alloc);
 382    }
 383}
 384
 385static uint16_t phys_map_node_alloc(void)
 386{
 387    unsigned i;
 388    uint16_t ret;
 389
 390    ret = phys_map_nodes_nb++;
 391    assert(ret != PHYS_MAP_NODE_NIL);
 392    assert(ret != phys_map_nodes_nb_alloc);
 393    for (i = 0; i < L2_SIZE; ++i) {
 394        phys_map_nodes[ret][i].is_leaf = 0;
 395        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 396    }
 397    return ret;
 398}
 399
 400static void phys_map_nodes_reset(void)
 401{
 402    phys_map_nodes_nb = 0;
 403}
 404
 405
 406static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
 407                                hwaddr *nb, uint16_t leaf,
 408                                int level)
 409{
 410    PhysPageEntry *p;
 411    int i;
 412    hwaddr step = (hwaddr)1 << (level * L2_BITS);
 413
 414    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
 415        lp->ptr = phys_map_node_alloc();
 416        p = phys_map_nodes[lp->ptr];
 417        if (level == 0) {
 418            for (i = 0; i < L2_SIZE; i++) {
 419                p[i].is_leaf = 1;
 420                p[i].ptr = phys_section_unassigned;
 421            }
 422        }
 423    } else {
 424        p = phys_map_nodes[lp->ptr];
 425    }
 426    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 427
 428    while (*nb && lp < &p[L2_SIZE]) {
 429        if ((*index & (step - 1)) == 0 && *nb >= step) {
 430            lp->is_leaf = true;
 431            lp->ptr = leaf;
 432            *index += step;
 433            *nb -= step;
 434        } else {
 435            phys_page_set_level(lp, index, nb, leaf, level - 1);
 436        }
 437        ++lp;
 438    }
 439}
 440
 441static void phys_page_set(AddressSpaceDispatch *d,
 442                          hwaddr index, hwaddr nb,
 443                          uint16_t leaf)
 444{
 445    /* Wildly overreserve - it doesn't matter much. */
 446    phys_map_node_reserve(3 * P_L2_LEVELS);
 447
 448    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 449}
 450
 451MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
 452{
 453    PhysPageEntry lp = d->phys_map;
 454    PhysPageEntry *p;
 455    int i;
 456    uint16_t s_index = phys_section_unassigned;
 457
 458    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
 459        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 460            goto not_found;
 461        }
 462        p = phys_map_nodes[lp.ptr];
 463        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
 464    }
 465
 466    s_index = lp.ptr;
 467not_found:
 468    return &phys_sections[s_index];
 469}
 470
 471bool memory_region_is_unassigned(MemoryRegion *mr)
 472{
 473    return mr != &io_mem_ram && mr != &io_mem_rom
 474        && mr != &io_mem_notdirty && !mr->rom_device
 475        && mr != &io_mem_watch;
 476}
 477
 478#define mmap_lock() do { } while(0)
 479#define mmap_unlock() do { } while(0)
 480#endif
 481
 482#if defined(CONFIG_USER_ONLY)
 483/* Currently it is not recommended to allocate big chunks of data in
 484   user mode. It will change when a dedicated libc will be used.  */
 485/* ??? 64-bit hosts ought to have no problem mmaping data outside the
 486   region in which the guest needs to run.  Revisit this.  */
 487#define USE_STATIC_CODE_GEN_BUFFER
 488#endif
 489
 490/* ??? Should configure for this, not list operating systems here.  */
 491#if (defined(__linux__) \
 492    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
 493    || defined(__DragonFly__) || defined(__OpenBSD__) \
 494    || defined(__NetBSD__))
 495# define USE_MMAP
 496#endif
 497
 498/* Minimum size of the code gen buffer.  This number is randomly chosen,
 499   but not so small that we can't have a fair number of TB's live.  */
 500#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
 501
 502/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
 503   indicated, this is constrained by the range of direct branches on the
 504   host cpu, as used by the TCG implementation of goto_tb.  */
 505#if defined(__x86_64__)
 506# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 507#elif defined(__sparc__)
 508# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 509#elif defined(__arm__)
 510# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
 511#elif defined(__s390x__)
 512  /* We have a +- 4GB range on the branches; leave some slop.  */
 513# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
 514#else
 515# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 516#endif
 517
 518#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
 519
 520#define DEFAULT_CODE_GEN_BUFFER_SIZE \
 521  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 522   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 523
 524static inline size_t size_code_gen_buffer(size_t tb_size)
 525{
 526    /* Size the buffer.  */
 527    if (tb_size == 0) {
 528#ifdef USE_STATIC_CODE_GEN_BUFFER
 529        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 530#else
 531        /* ??? Needs adjustments.  */
 532        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
 533           static buffer, we could size this on RESERVED_VA, on the text
 534           segment size of the executable, or continue to use the default.  */
 535        tb_size = (unsigned long)(ram_size / 4);
 536#endif
 537    }
 538    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 539        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 540    }
 541    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 542        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 543    }
 544    code_gen_buffer_size = tb_size;
 545    return tb_size;
 546}
 547
 548#ifdef USE_STATIC_CODE_GEN_BUFFER
 549static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 550    __attribute__((aligned(CODE_GEN_ALIGN)));
 551
 552static inline void *alloc_code_gen_buffer(void)
 553{
 554    map_exec(static_code_gen_buffer, code_gen_buffer_size);
 555    return static_code_gen_buffer;
 556}
 557#elif defined(USE_MMAP)
 558static inline void *alloc_code_gen_buffer(void)
 559{
 560    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
 561    uintptr_t start = 0;
 562    void *buf;
 563
 564    /* Constrain the position of the buffer based on the host cpu.
 565       Note that these addresses are chosen in concert with the
 566       addresses assigned in the relevant linker script file.  */
 567# if defined(__PIE__) || defined(__PIC__)
 568    /* Don't bother setting a preferred location if we're building
 569       a position-independent executable.  We're more likely to get
 570       an address near the main executable if we let the kernel
 571       choose the address.  */
 572# elif defined(__x86_64__) && defined(MAP_32BIT)
 573    /* Force the memory down into low memory with the executable.
 574       Leave the choice of exact location with the kernel.  */
 575    flags |= MAP_32BIT;
 576    /* Cannot expect to map more than 800MB in low memory.  */
 577    if (code_gen_buffer_size > 800u * 1024 * 1024) {
 578        code_gen_buffer_size = 800u * 1024 * 1024;
 579    }
 580# elif defined(__sparc__)
 581    start = 0x40000000ul;
 582# elif defined(__s390x__)
 583    start = 0x90000000ul;
 584# endif
 585
 586    buf = mmap((void *)start, code_gen_buffer_size,
 587               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
 588    return buf == MAP_FAILED ? NULL : buf;
 589}
 590#else
 591static inline void *alloc_code_gen_buffer(void)
 592{
 593    void *buf = g_malloc(code_gen_buffer_size);
 594    if (buf) {
 595        map_exec(buf, code_gen_buffer_size);
 596    }
 597    return buf;
 598}
 599#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
 600
 601static inline void code_gen_alloc(size_t tb_size)
 602{
 603    code_gen_buffer_size = size_code_gen_buffer(tb_size);
 604    code_gen_buffer = alloc_code_gen_buffer();
 605    if (code_gen_buffer == NULL) {
 606        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
 607        exit(1);
 608    }
 609
 610    /* Steal room for the prologue at the end of the buffer.  This ensures
 611       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
 612       from TB's to the prologue are going to be in range.  It also means
 613       that we don't need to mark (additional) portions of the data segment
 614       as executable.  */
 615    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
 616    code_gen_buffer_size -= 1024;
 617
 618    code_gen_buffer_max_size = code_gen_buffer_size -
 619        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
 620    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
 621    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
 622}
 623
 624/* Must be called before using the QEMU cpus. 'tb_size' is the size
 625   (in bytes) allocated to the translation buffer. Zero means default
 626   size. */
 627void tcg_exec_init(unsigned long tb_size)
 628{
 629    cpu_gen_init();
 630    code_gen_alloc(tb_size);
 631    code_gen_ptr = code_gen_buffer;
 632    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
 633    page_init();
 634#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
 635    /* There's no guest base to take into account, so go ahead and
 636       initialize the prologue now.  */
 637    tcg_prologue_init(&tcg_ctx);
 638#endif
 639}
 640
 641bool tcg_enabled(void)
 642{
 643    return code_gen_buffer != NULL;
 644}
 645
 646void cpu_exec_init_all(void)
 647{
 648#if !defined(CONFIG_USER_ONLY)
 649    memory_map_init();
 650    io_mem_init();
 651#endif
 652}
 653
 654#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 655
 656static int cpu_common_post_load(void *opaque, int version_id)
 657{
 658    CPUArchState *env = opaque;
 659
 660    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 661       version_id is increased. */
 662    env->interrupt_request &= ~0x01;
 663    tlb_flush(env, 1);
 664
 665    return 0;
 666}
 667
 668static const VMStateDescription vmstate_cpu_common = {
 669    .name = "cpu_common",
 670    .version_id = 1,
 671    .minimum_version_id = 1,
 672    .minimum_version_id_old = 1,
 673    .post_load = cpu_common_post_load,
 674    .fields      = (VMStateField []) {
 675        VMSTATE_UINT32(halted, CPUArchState),
 676        VMSTATE_UINT32(interrupt_request, CPUArchState),
 677        VMSTATE_END_OF_LIST()
 678    }
 679};
 680#endif
 681
 682CPUArchState *qemu_get_cpu(int cpu)
 683{
 684    CPUArchState *env = first_cpu;
 685
 686    while (env) {
 687        if (env->cpu_index == cpu)
 688            break;
 689        env = env->next_cpu;
 690    }
 691
 692    return env;
 693}
 694
 695void cpu_exec_init(CPUArchState *env)
 696{
 697#ifndef CONFIG_USER_ONLY
 698    CPUState *cpu = ENV_GET_CPU(env);
 699#endif
 700    CPUArchState **penv;
 701    int cpu_index;
 702
 703#if defined(CONFIG_USER_ONLY)
 704    cpu_list_lock();
 705#endif
 706    env->next_cpu = NULL;
 707    penv = &first_cpu;
 708    cpu_index = 0;
 709    while (*penv != NULL) {
 710        penv = &(*penv)->next_cpu;
 711        cpu_index++;
 712    }
 713    env->cpu_index = cpu_index;
 714    env->numa_node = 0;
 715    QTAILQ_INIT(&env->breakpoints);
 716    QTAILQ_INIT(&env->watchpoints);
 717#ifndef CONFIG_USER_ONLY
 718    cpu->thread_id = qemu_get_thread_id();
 719#endif
 720    *penv = env;
 721#if defined(CONFIG_USER_ONLY)
 722    cpu_list_unlock();
 723#endif
 724#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 725    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
 726    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 727                    cpu_save, cpu_load, env);
 728#endif
 729}
 730
 731/* Allocate a new translation block. Flush the translation buffer if
 732   too many translation blocks or too much generated code. */
 733static TranslationBlock *tb_alloc(target_ulong pc)
 734{
 735    TranslationBlock *tb;
 736
 737    if (nb_tbs >= code_gen_max_blocks ||
 738        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
 739        return NULL;
 740    tb = &tbs[nb_tbs++];
 741    tb->pc = pc;
 742    tb->cflags = 0;
 743    return tb;
 744}
 745
 746void tb_free(TranslationBlock *tb)
 747{
 748    /* In practice this is mostly used for single use temporary TB
 749       Ignore the hard cases and just back up if this TB happens to
 750       be the last one generated.  */
 751    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
 752        code_gen_ptr = tb->tc_ptr;
 753        nb_tbs--;
 754    }
 755}
 756
 757static inline void invalidate_page_bitmap(PageDesc *p)
 758{
 759    if (p->code_bitmap) {
 760        g_free(p->code_bitmap);
 761        p->code_bitmap = NULL;
 762    }
 763    p->code_write_count = 0;
 764}
 765
 766/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 767
 768static void page_flush_tb_1 (int level, void **lp)
 769{
 770    int i;
 771
 772    if (*lp == NULL) {
 773        return;
 774    }
 775    if (level == 0) {
 776        PageDesc *pd = *lp;
 777        for (i = 0; i < L2_SIZE; ++i) {
 778            pd[i].first_tb = NULL;
 779            invalidate_page_bitmap(pd + i);
 780        }
 781    } else {
 782        void **pp = *lp;
 783        for (i = 0; i < L2_SIZE; ++i) {
 784            page_flush_tb_1 (level - 1, pp + i);
 785        }
 786    }
 787}
 788
 789static void page_flush_tb(void)
 790{
 791    int i;
 792    for (i = 0; i < V_L1_SIZE; i++) {
 793        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
 794    }
 795}
 796
 797/* flush all the translation blocks */
 798/* XXX: tb_flush is currently not thread safe */
 799void tb_flush(CPUArchState *env1)
 800{
 801    CPUArchState *env;
 802#if defined(DEBUG_FLUSH)
 803    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
 804           (unsigned long)(code_gen_ptr - code_gen_buffer),
 805           nb_tbs, nb_tbs > 0 ?
 806           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
 807#endif
 808    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
 809        cpu_abort(env1, "Internal error: code buffer overflow\n");
 810
 811    nb_tbs = 0;
 812
 813    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 814        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
 815    }
 816
 817    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
 818    page_flush_tb();
 819
 820    code_gen_ptr = code_gen_buffer;
 821    /* XXX: flush processor icache at this point if cache flush is
 822       expensive */
 823    tb_flush_count++;
 824}
 825
 826#ifdef DEBUG_TB_CHECK
 827
 828static void tb_invalidate_check(target_ulong address)
 829{
 830    TranslationBlock *tb;
 831    int i;
 832    address &= TARGET_PAGE_MASK;
 833    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 834        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 835            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
 836                  address >= tb->pc + tb->size)) {
 837                printf("ERROR invalidate: address=" TARGET_FMT_lx
 838                       " PC=%08lx size=%04x\n",
 839                       address, (long)tb->pc, tb->size);
 840            }
 841        }
 842    }
 843}
 844
 845/* verify that all the pages have correct rights for code */
 846static void tb_page_check(void)
 847{
 848    TranslationBlock *tb;
 849    int i, flags1, flags2;
 850
 851    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 852        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 853            flags1 = page_get_flags(tb->pc);
 854            flags2 = page_get_flags(tb->pc + tb->size - 1);
 855            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
 856                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
 857                       (long)tb->pc, tb->size, flags1, flags2);
 858            }
 859        }
 860    }
 861}
 862
 863#endif
 864
 865/* invalidate one TB */
 866static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
 867                             int next_offset)
 868{
 869    TranslationBlock *tb1;
 870    for(;;) {
 871        tb1 = *ptb;
 872        if (tb1 == tb) {
 873            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
 874            break;
 875        }
 876        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
 877    }
 878}
 879
 880static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
 881{
 882    TranslationBlock *tb1;
 883    unsigned int n1;
 884
 885    for(;;) {
 886        tb1 = *ptb;
 887        n1 = (uintptr_t)tb1 & 3;
 888        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 889        if (tb1 == tb) {
 890            *ptb = tb1->page_next[n1];
 891            break;
 892        }
 893        ptb = &tb1->page_next[n1];
 894    }
 895}
 896
 897static inline void tb_jmp_remove(TranslationBlock *tb, int n)
 898{
 899    TranslationBlock *tb1, **ptb;
 900    unsigned int n1;
 901
 902    ptb = &tb->jmp_next[n];
 903    tb1 = *ptb;
 904    if (tb1) {
 905        /* find tb(n) in circular list */
 906        for(;;) {
 907            tb1 = *ptb;
 908            n1 = (uintptr_t)tb1 & 3;
 909            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 910            if (n1 == n && tb1 == tb)
 911                break;
 912            if (n1 == 2) {
 913                ptb = &tb1->jmp_first;
 914            } else {
 915                ptb = &tb1->jmp_next[n1];
 916            }
 917        }
 918        /* now we can suppress tb(n) from the list */
 919        *ptb = tb->jmp_next[n];
 920
 921        tb->jmp_next[n] = NULL;
 922    }
 923}
 924
 925/* reset the jump entry 'n' of a TB so that it is not chained to
 926   another TB */
 927static inline void tb_reset_jump(TranslationBlock *tb, int n)
 928{
 929    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
 930}
 931
 932void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 933{
 934    CPUArchState *env;
 935    PageDesc *p;
 936    unsigned int h, n1;
 937    tb_page_addr_t phys_pc;
 938    TranslationBlock *tb1, *tb2;
 939
 940    /* remove the TB from the hash list */
 941    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
 942    h = tb_phys_hash_func(phys_pc);
 943    tb_remove(&tb_phys_hash[h], tb,
 944              offsetof(TranslationBlock, phys_hash_next));
 945
 946    /* remove the TB from the page list */
 947    if (tb->page_addr[0] != page_addr) {
 948        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 949        tb_page_remove(&p->first_tb, tb);
 950        invalidate_page_bitmap(p);
 951    }
 952    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
 953        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 954        tb_page_remove(&p->first_tb, tb);
 955        invalidate_page_bitmap(p);
 956    }
 957
 958    tb_invalidated_flag = 1;
 959
 960    /* remove the TB from the hash list */
 961    h = tb_jmp_cache_hash_func(tb->pc);
 962    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 963        if (env->tb_jmp_cache[h] == tb)
 964            env->tb_jmp_cache[h] = NULL;
 965    }
 966
 967    /* suppress this TB from the two jump lists */
 968    tb_jmp_remove(tb, 0);
 969    tb_jmp_remove(tb, 1);
 970
 971    /* suppress any remaining jumps to this TB */
 972    tb1 = tb->jmp_first;
 973    for(;;) {
 974        n1 = (uintptr_t)tb1 & 3;
 975        if (n1 == 2)
 976            break;
 977        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
 978        tb2 = tb1->jmp_next[n1];
 979        tb_reset_jump(tb1, n1);
 980        tb1->jmp_next[n1] = NULL;
 981        tb1 = tb2;
 982    }
 983    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
 984
 985    tb_phys_invalidate_count++;
 986}
 987
 988static inline void set_bits(uint8_t *tab, int start, int len)
 989{
 990    int end, mask, end1;
 991
 992    end = start + len;
 993    tab += start >> 3;
 994    mask = 0xff << (start & 7);
 995    if ((start & ~7) == (end & ~7)) {
 996        if (start < end) {
 997            mask &= ~(0xff << (end & 7));
 998            *tab |= mask;
 999        }
1000    } else {
1001        *tab++ |= mask;
1002        start = (start + 8) & ~7;
1003        end1 = end & ~7;
1004        while (start < end1) {
1005            *tab++ = 0xff;
1006            start += 8;
1007        }
1008        if (start < end) {
1009            mask = ~(0xff << (end & 7));
1010            *tab |= mask;
1011        }
1012    }
1013}
1014
1015static void build_page_bitmap(PageDesc *p)
1016{
1017    int n, tb_start, tb_end;
1018    TranslationBlock *tb;
1019
1020    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1021
1022    tb = p->first_tb;
1023    while (tb != NULL) {
1024        n = (uintptr_t)tb & 3;
1025        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1026        /* NOTE: this is subtle as a TB may span two physical pages */
1027        if (n == 0) {
1028            /* NOTE: tb_end may be after the end of the page, but
1029               it is not a problem */
1030            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1031            tb_end = tb_start + tb->size;
1032            if (tb_end > TARGET_PAGE_SIZE)
1033                tb_end = TARGET_PAGE_SIZE;
1034        } else {
1035            tb_start = 0;
1036            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1037        }
1038        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1039        tb = tb->page_next[n];
1040    }
1041}
1042
1043TranslationBlock *tb_gen_code(CPUArchState *env,
1044                              target_ulong pc, target_ulong cs_base,
1045                              int flags, int cflags)
1046{
1047    TranslationBlock *tb;
1048    uint8_t *tc_ptr;
1049    tb_page_addr_t phys_pc, phys_page2;
1050    target_ulong virt_page2;
1051    int code_gen_size;
1052
1053    phys_pc = get_page_addr_code(env, pc);
1054    tb = tb_alloc(pc);
1055    if (!tb) {
1056        /* flush must be done */
1057        tb_flush(env);
1058        /* cannot fail at this point */
1059        tb = tb_alloc(pc);
1060        /* Don't forget to invalidate previous TB info.  */
1061        tb_invalidated_flag = 1;
1062    }
1063    tc_ptr = code_gen_ptr;
1064    tb->tc_ptr = tc_ptr;
1065    tb->cs_base = cs_base;
1066    tb->flags = flags;
1067    tb->cflags = cflags;
1068    cpu_gen_code(env, tb, &code_gen_size);
1069    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1070                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1071
1072    /* check next page if needed */
1073    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1074    phys_page2 = -1;
1075    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1076        phys_page2 = get_page_addr_code(env, virt_page2);
1077    }
1078    tb_link_page(tb, phys_pc, phys_page2);
1079    return tb;
1080}
1081
1082/*
1083 * Invalidate all TBs which intersect with the target physical address range
1084 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1085 * 'is_cpu_write_access' should be true if called from a real cpu write
1086 * access: the virtual CPU will exit the current TB if code is modified inside
1087 * this TB.
1088 */
1089void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1090                              int is_cpu_write_access)
1091{
1092    while (start < end) {
1093        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1094        start &= TARGET_PAGE_MASK;
1095        start += TARGET_PAGE_SIZE;
1096    }
1097}
1098
1099/*
1100 * Invalidate all TBs which intersect with the target physical address range
1101 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1102 * 'is_cpu_write_access' should be true if called from a real cpu write
1103 * access: the virtual CPU will exit the current TB if code is modified inside
1104 * this TB.
1105 */
1106void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1107                                   int is_cpu_write_access)
1108{
1109    TranslationBlock *tb, *tb_next, *saved_tb;
1110    CPUArchState *env = cpu_single_env;
1111    tb_page_addr_t tb_start, tb_end;
1112    PageDesc *p;
1113    int n;
1114#ifdef TARGET_HAS_PRECISE_SMC
1115    int current_tb_not_found = is_cpu_write_access;
1116    TranslationBlock *current_tb = NULL;
1117    int current_tb_modified = 0;
1118    target_ulong current_pc = 0;
1119    target_ulong current_cs_base = 0;
1120    int current_flags = 0;
1121#endif /* TARGET_HAS_PRECISE_SMC */
1122
1123    p = page_find(start >> TARGET_PAGE_BITS);
1124    if (!p)
1125        return;
1126    if (!p->code_bitmap &&
1127        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1128        is_cpu_write_access) {
1129        /* build code bitmap */
1130        build_page_bitmap(p);
1131    }
1132
1133    /* we remove all the TBs in the range [start, end[ */
1134    /* XXX: see if in some cases it could be faster to invalidate all the code */
1135    tb = p->first_tb;
1136    while (tb != NULL) {
1137        n = (uintptr_t)tb & 3;
1138        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1139        tb_next = tb->page_next[n];
1140        /* NOTE: this is subtle as a TB may span two physical pages */
1141        if (n == 0) {
1142            /* NOTE: tb_end may be after the end of the page, but
1143               it is not a problem */
1144            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1145            tb_end = tb_start + tb->size;
1146        } else {
1147            tb_start = tb->page_addr[1];
1148            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1149        }
1150        if (!(tb_end <= start || tb_start >= end)) {
1151#ifdef TARGET_HAS_PRECISE_SMC
1152            if (current_tb_not_found) {
1153                current_tb_not_found = 0;
1154                current_tb = NULL;
1155                if (env->mem_io_pc) {
1156                    /* now we have a real cpu fault */
1157                    current_tb = tb_find_pc(env->mem_io_pc);
1158                }
1159            }
1160            if (current_tb == tb &&
1161                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1162                /* If we are modifying the current TB, we must stop
1163                its execution. We could be more precise by checking
1164                that the modification is after the current PC, but it
1165                would require a specialized function to partially
1166                restore the CPU state */
1167
1168                current_tb_modified = 1;
1169                cpu_restore_state(current_tb, env, env->mem_io_pc);
1170                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1171                                     &current_flags);
1172            }
1173#endif /* TARGET_HAS_PRECISE_SMC */
1174            /* we need to do that to handle the case where a signal
1175               occurs while doing tb_phys_invalidate() */
1176            saved_tb = NULL;
1177            if (env) {
1178                saved_tb = env->current_tb;
1179                env->current_tb = NULL;
1180            }
1181            tb_phys_invalidate(tb, -1);
1182            if (env) {
1183                env->current_tb = saved_tb;
1184                if (env->interrupt_request && env->current_tb)
1185                    cpu_interrupt(env, env->interrupt_request);
1186            }
1187        }
1188        tb = tb_next;
1189    }
1190#if !defined(CONFIG_USER_ONLY)
1191    /* if no code remaining, no need to continue to use slow writes */
1192    if (!p->first_tb) {
1193        invalidate_page_bitmap(p);
1194        if (is_cpu_write_access) {
1195            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1196        }
1197    }
1198#endif
1199#ifdef TARGET_HAS_PRECISE_SMC
1200    if (current_tb_modified) {
1201        /* we generate a block containing just the instruction
1202           modifying the memory. It will ensure that it cannot modify
1203           itself */
1204        env->current_tb = NULL;
1205        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1206        cpu_resume_from_signal(env, NULL);
1207    }
1208#endif
1209}
1210
1211/* len must be <= 8 and start must be a multiple of len */
1212static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1213{
1214    PageDesc *p;
1215    int offset, b;
1216#if 0
1217    if (1) {
1218        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1219                  cpu_single_env->mem_io_vaddr, len,
1220                  cpu_single_env->eip,
1221                  cpu_single_env->eip +
1222                  (intptr_t)cpu_single_env->segs[R_CS].base);
1223    }
1224#endif
1225    p = page_find(start >> TARGET_PAGE_BITS);
1226    if (!p)
1227        return;
1228    if (p->code_bitmap) {
1229        offset = start & ~TARGET_PAGE_MASK;
1230        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1231        if (b & ((1 << len) - 1))
1232            goto do_invalidate;
1233    } else {
1234    do_invalidate:
1235        tb_invalidate_phys_page_range(start, start + len, 1);
1236    }
1237}
1238
1239#if !defined(CONFIG_SOFTMMU)
1240static void tb_invalidate_phys_page(tb_page_addr_t addr,
1241                                    uintptr_t pc, void *puc)
1242{
1243    TranslationBlock *tb;
1244    PageDesc *p;
1245    int n;
1246#ifdef TARGET_HAS_PRECISE_SMC
1247    TranslationBlock *current_tb = NULL;
1248    CPUArchState *env = cpu_single_env;
1249    int current_tb_modified = 0;
1250    target_ulong current_pc = 0;
1251    target_ulong current_cs_base = 0;
1252    int current_flags = 0;
1253#endif
1254
1255    addr &= TARGET_PAGE_MASK;
1256    p = page_find(addr >> TARGET_PAGE_BITS);
1257    if (!p)
1258        return;
1259    tb = p->first_tb;
1260#ifdef TARGET_HAS_PRECISE_SMC
1261    if (tb && pc != 0) {
1262        current_tb = tb_find_pc(pc);
1263    }
1264#endif
1265    while (tb != NULL) {
1266        n = (uintptr_t)tb & 3;
1267        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1268#ifdef TARGET_HAS_PRECISE_SMC
1269        if (current_tb == tb &&
1270            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1271                /* If we are modifying the current TB, we must stop
1272                   its execution. We could be more precise by checking
1273                   that the modification is after the current PC, but it
1274                   would require a specialized function to partially
1275                   restore the CPU state */
1276
1277            current_tb_modified = 1;
1278            cpu_restore_state(current_tb, env, pc);
1279            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1280                                 &current_flags);
1281        }
1282#endif /* TARGET_HAS_PRECISE_SMC */
1283        tb_phys_invalidate(tb, addr);
1284        tb = tb->page_next[n];
1285    }
1286    p->first_tb = NULL;
1287#ifdef TARGET_HAS_PRECISE_SMC
1288    if (current_tb_modified) {
1289        /* we generate a block containing just the instruction
1290           modifying the memory. It will ensure that it cannot modify
1291           itself */
1292        env->current_tb = NULL;
1293        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1294        cpu_resume_from_signal(env, puc);
1295    }
1296#endif
1297}
1298#endif
1299
1300/* add the tb in the target page and protect it if necessary */
1301static inline void tb_alloc_page(TranslationBlock *tb,
1302                                 unsigned int n, tb_page_addr_t page_addr)
1303{
1304    PageDesc *p;
1305#ifndef CONFIG_USER_ONLY
1306    bool page_already_protected;
1307#endif
1308
1309    tb->page_addr[n] = page_addr;
1310    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1311    tb->page_next[n] = p->first_tb;
1312#ifndef CONFIG_USER_ONLY
1313    page_already_protected = p->first_tb != NULL;
1314#endif
1315    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1316    invalidate_page_bitmap(p);
1317
1318#if defined(TARGET_HAS_SMC) || 1
1319
1320#if defined(CONFIG_USER_ONLY)
1321    if (p->flags & PAGE_WRITE) {
1322        target_ulong addr;
1323        PageDesc *p2;
1324        int prot;
1325
1326        /* force the host page as non writable (writes will have a
1327           page fault + mprotect overhead) */
1328        page_addr &= qemu_host_page_mask;
1329        prot = 0;
1330        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1331            addr += TARGET_PAGE_SIZE) {
1332
1333            p2 = page_find (addr >> TARGET_PAGE_BITS);
1334            if (!p2)
1335                continue;
1336            prot |= p2->flags;
1337            p2->flags &= ~PAGE_WRITE;
1338          }
1339        mprotect(g2h(page_addr), qemu_host_page_size,
1340                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1341#ifdef DEBUG_TB_INVALIDATE
1342        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1343               page_addr);
1344#endif
1345    }
1346#else
1347    /* if some code is already present, then the pages are already
1348       protected. So we handle the case where only the first TB is
1349       allocated in a physical page */
1350    if (!page_already_protected) {
1351        tlb_protect_code(page_addr);
1352    }
1353#endif
1354
1355#endif /* TARGET_HAS_SMC */
1356}
1357
1358/* add a new TB and link it to the physical page tables. phys_page2 is
1359   (-1) to indicate that only one page contains the TB. */
1360static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1361                         tb_page_addr_t phys_page2)
1362{
1363    unsigned int h;
1364    TranslationBlock **ptb;
1365
1366    /* Grab the mmap lock to stop another thread invalidating this TB
1367       before we are done.  */
1368    mmap_lock();
1369    /* add in the physical hash table */
1370    h = tb_phys_hash_func(phys_pc);
1371    ptb = &tb_phys_hash[h];
1372    tb->phys_hash_next = *ptb;
1373    *ptb = tb;
1374
1375    /* add in the page list */
1376    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1377    if (phys_page2 != -1)
1378        tb_alloc_page(tb, 1, phys_page2);
1379    else
1380        tb->page_addr[1] = -1;
1381
1382    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1383    tb->jmp_next[0] = NULL;
1384    tb->jmp_next[1] = NULL;
1385
1386    /* init original jump addresses */
1387    if (tb->tb_next_offset[0] != 0xffff)
1388        tb_reset_jump(tb, 0);
1389    if (tb->tb_next_offset[1] != 0xffff)
1390        tb_reset_jump(tb, 1);
1391
1392#ifdef DEBUG_TB_CHECK
1393    tb_page_check();
1394#endif
1395    mmap_unlock();
1396}
1397
1398#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1399/* check whether the given addr is in TCG generated code buffer or not */
1400bool is_tcg_gen_code(uintptr_t tc_ptr)
1401{
1402    /* This can be called during code generation, code_gen_buffer_max_size
1403       is used instead of code_gen_ptr for upper boundary checking */
1404    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1405            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1406}
1407#endif
1408
1409/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1410   tb[1].tc_ptr. Return NULL if not found */
1411TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1412{
1413    int m_min, m_max, m;
1414    uintptr_t v;
1415    TranslationBlock *tb;
1416
1417    if (nb_tbs <= 0)
1418        return NULL;
1419    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1420        tc_ptr >= (uintptr_t)code_gen_ptr) {
1421        return NULL;
1422    }
1423    /* binary search (cf Knuth) */
1424    m_min = 0;
1425    m_max = nb_tbs - 1;
1426    while (m_min <= m_max) {
1427        m = (m_min + m_max) >> 1;
1428        tb = &tbs[m];
1429        v = (uintptr_t)tb->tc_ptr;
1430        if (v == tc_ptr)
1431            return tb;
1432        else if (tc_ptr < v) {
1433            m_max = m - 1;
1434        } else {
1435            m_min = m + 1;
1436        }
1437    }
1438    return &tbs[m_max];
1439}
1440
1441static void tb_reset_jump_recursive(TranslationBlock *tb);
1442
1443static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1444{
1445    TranslationBlock *tb1, *tb_next, **ptb;
1446    unsigned int n1;
1447
1448    tb1 = tb->jmp_next[n];
1449    if (tb1 != NULL) {
1450        /* find head of list */
1451        for(;;) {
1452            n1 = (uintptr_t)tb1 & 3;
1453            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1454            if (n1 == 2)
1455                break;
1456            tb1 = tb1->jmp_next[n1];
1457        }
1458        /* we are now sure now that tb jumps to tb1 */
1459        tb_next = tb1;
1460
1461        /* remove tb from the jmp_first list */
1462        ptb = &tb_next->jmp_first;
1463        for(;;) {
1464            tb1 = *ptb;
1465            n1 = (uintptr_t)tb1 & 3;
1466            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1467            if (n1 == n && tb1 == tb)
1468                break;
1469            ptb = &tb1->jmp_next[n1];
1470        }
1471        *ptb = tb->jmp_next[n];
1472        tb->jmp_next[n] = NULL;
1473
1474        /* suppress the jump to next tb in generated code */
1475        tb_reset_jump(tb, n);
1476
1477        /* suppress jumps in the tb on which we could have jumped */
1478        tb_reset_jump_recursive(tb_next);
1479    }
1480}
1481
1482static void tb_reset_jump_recursive(TranslationBlock *tb)
1483{
1484    tb_reset_jump_recursive2(tb, 0);
1485    tb_reset_jump_recursive2(tb, 1);
1486}
1487
1488#if defined(TARGET_HAS_ICE)
1489#if defined(CONFIG_USER_ONLY)
1490static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1491{
1492    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1493}
1494#else
1495void tb_invalidate_phys_addr(hwaddr addr)
1496{
1497    ram_addr_t ram_addr;
1498    MemoryRegionSection *section;
1499
1500    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1501    if (!(memory_region_is_ram(section->mr)
1502          || (section->mr->rom_device && section->mr->readable))) {
1503        return;
1504    }
1505    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1506        + memory_region_section_addr(section, addr);
1507    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1508}
1509
1510static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1511{
1512    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1513            (pc & ~TARGET_PAGE_MASK));
1514}
1515#endif
1516#endif /* TARGET_HAS_ICE */
1517
1518#if defined(CONFIG_USER_ONLY)
1519void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1520
1521{
1522}
1523
1524int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1525                          int flags, CPUWatchpoint **watchpoint)
1526{
1527    return -ENOSYS;
1528}
1529#else
1530/* Add a watchpoint.  */
1531int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1532                          int flags, CPUWatchpoint **watchpoint)
1533{
1534    target_ulong len_mask = ~(len - 1);
1535    CPUWatchpoint *wp;
1536
1537    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1538    if ((len & (len - 1)) || (addr & ~len_mask) ||
1539            len == 0 || len > TARGET_PAGE_SIZE) {
1540        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1541                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1542        return -EINVAL;
1543    }
1544    wp = g_malloc(sizeof(*wp));
1545
1546    wp->vaddr = addr;
1547    wp->len_mask = len_mask;
1548    wp->flags = flags;
1549
1550    /* keep all GDB-injected watchpoints in front */
1551    if (flags & BP_GDB)
1552        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1553    else
1554        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1555
1556    tlb_flush_page(env, addr);
1557
1558    if (watchpoint)
1559        *watchpoint = wp;
1560    return 0;
1561}
1562
1563/* Remove a specific watchpoint.  */
1564int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1565                          int flags)
1566{
1567    target_ulong len_mask = ~(len - 1);
1568    CPUWatchpoint *wp;
1569
1570    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1571        if (addr == wp->vaddr && len_mask == wp->len_mask
1572                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1573            cpu_watchpoint_remove_by_ref(env, wp);
1574            return 0;
1575        }
1576    }
1577    return -ENOENT;
1578}
1579
1580/* Remove a specific watchpoint by reference.  */
1581void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1582{
1583    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1584
1585    tlb_flush_page(env, watchpoint->vaddr);
1586
1587    g_free(watchpoint);
1588}
1589
1590/* Remove all matching watchpoints.  */
1591void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1592{
1593    CPUWatchpoint *wp, *next;
1594
1595    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1596        if (wp->flags & mask)
1597            cpu_watchpoint_remove_by_ref(env, wp);
1598    }
1599}
1600#endif
1601
1602/* Add a breakpoint.  */
1603int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1604                          CPUBreakpoint **breakpoint)
1605{
1606#if defined(TARGET_HAS_ICE)
1607    CPUBreakpoint *bp;
1608
1609    bp = g_malloc(sizeof(*bp));
1610
1611    bp->pc = pc;
1612    bp->flags = flags;
1613
1614    /* keep all GDB-injected breakpoints in front */
1615    if (flags & BP_GDB)
1616        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1617    else
1618        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1619
1620    breakpoint_invalidate(env, pc);
1621
1622    if (breakpoint)
1623        *breakpoint = bp;
1624    return 0;
1625#else
1626    return -ENOSYS;
1627#endif
1628}
1629
1630/* Remove a specific breakpoint.  */
1631int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1632{
1633#if defined(TARGET_HAS_ICE)
1634    CPUBreakpoint *bp;
1635
1636    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1637        if (bp->pc == pc && bp->flags == flags) {
1638            cpu_breakpoint_remove_by_ref(env, bp);
1639            return 0;
1640        }
1641    }
1642    return -ENOENT;
1643#else
1644    return -ENOSYS;
1645#endif
1646}
1647
1648/* Remove a specific breakpoint by reference.  */
1649void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1650{
1651#if defined(TARGET_HAS_ICE)
1652    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1653
1654    breakpoint_invalidate(env, breakpoint->pc);
1655
1656    g_free(breakpoint);
1657#endif
1658}
1659
1660/* Remove all matching breakpoints. */
1661void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1662{
1663#if defined(TARGET_HAS_ICE)
1664    CPUBreakpoint *bp, *next;
1665
1666    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1667        if (bp->flags & mask)
1668            cpu_breakpoint_remove_by_ref(env, bp);
1669    }
1670#endif
1671}
1672
1673/* enable or disable single step mode. EXCP_DEBUG is returned by the
1674   CPU loop after each instruction */
1675void cpu_single_step(CPUArchState *env, int enabled)
1676{
1677#if defined(TARGET_HAS_ICE)
1678    if (env->singlestep_enabled != enabled) {
1679        env->singlestep_enabled = enabled;
1680        if (kvm_enabled())
1681            kvm_update_guest_debug(env, 0);
1682        else {
1683            /* must flush all the translated code to avoid inconsistencies */
1684            /* XXX: only flush what is necessary */
1685            tb_flush(env);
1686        }
1687    }
1688#endif
1689}
1690
1691static void cpu_unlink_tb(CPUArchState *env)
1692{
1693    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1694       problem and hope the cpu will stop of its own accord.  For userspace
1695       emulation this often isn't actually as bad as it sounds.  Often
1696       signals are used primarily to interrupt blocking syscalls.  */
1697    TranslationBlock *tb;
1698    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1699
1700    spin_lock(&interrupt_lock);
1701    tb = env->current_tb;
1702    /* if the cpu is currently executing code, we must unlink it and
1703       all the potentially executing TB */
1704    if (tb) {
1705        env->current_tb = NULL;
1706        tb_reset_jump_recursive(tb);
1707    }
1708    spin_unlock(&interrupt_lock);
1709}
1710
1711#ifndef CONFIG_USER_ONLY
1712/* mask must never be zero, except for A20 change call */
1713static void tcg_handle_interrupt(CPUArchState *env, int mask)
1714{
1715    CPUState *cpu = ENV_GET_CPU(env);
1716    int old_mask;
1717
1718    old_mask = env->interrupt_request;
1719    env->interrupt_request |= mask;
1720
1721    /*
1722     * If called from iothread context, wake the target cpu in
1723     * case its halted.
1724     */
1725    if (!qemu_cpu_is_self(cpu)) {
1726        qemu_cpu_kick(cpu);
1727        return;
1728    }
1729
1730    if (use_icount) {
1731        env->icount_decr.u16.high = 0xffff;
1732        if (!can_do_io(env)
1733            && (mask & ~old_mask) != 0) {
1734            cpu_abort(env, "Raised interrupt while not in I/O function");
1735        }
1736    } else {
1737        cpu_unlink_tb(env);
1738    }
1739}
1740
1741CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1742
1743#else /* CONFIG_USER_ONLY */
1744
1745void cpu_interrupt(CPUArchState *env, int mask)
1746{
1747    env->interrupt_request |= mask;
1748    cpu_unlink_tb(env);
1749}
1750#endif /* CONFIG_USER_ONLY */
1751
1752void cpu_reset_interrupt(CPUArchState *env, int mask)
1753{
1754    env->interrupt_request &= ~mask;
1755}
1756
1757void cpu_exit(CPUArchState *env)
1758{
1759    env->exit_request = 1;
1760    cpu_unlink_tb(env);
1761}
1762
1763void cpu_abort(CPUArchState *env, const char *fmt, ...)
1764{
1765    va_list ap;
1766    va_list ap2;
1767
1768    va_start(ap, fmt);
1769    va_copy(ap2, ap);
1770    fprintf(stderr, "qemu: fatal: ");
1771    vfprintf(stderr, fmt, ap);
1772    fprintf(stderr, "\n");
1773    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1774    if (qemu_log_enabled()) {
1775        qemu_log("qemu: fatal: ");
1776        qemu_log_vprintf(fmt, ap2);
1777        qemu_log("\n");
1778        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1779        qemu_log_flush();
1780        qemu_log_close();
1781    }
1782    va_end(ap2);
1783    va_end(ap);
1784#if defined(CONFIG_USER_ONLY)
1785    {
1786        struct sigaction act;
1787        sigfillset(&act.sa_mask);
1788        act.sa_handler = SIG_DFL;
1789        sigaction(SIGABRT, &act, NULL);
1790    }
1791#endif
1792    abort();
1793}
1794
1795CPUArchState *cpu_copy(CPUArchState *env)
1796{
1797    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1798    CPUArchState *next_cpu = new_env->next_cpu;
1799    int cpu_index = new_env->cpu_index;
1800#if defined(TARGET_HAS_ICE)
1801    CPUBreakpoint *bp;
1802    CPUWatchpoint *wp;
1803#endif
1804
1805    memcpy(new_env, env, sizeof(CPUArchState));
1806
1807    /* Preserve chaining and index. */
1808    new_env->next_cpu = next_cpu;
1809    new_env->cpu_index = cpu_index;
1810
1811    /* Clone all break/watchpoints.
1812       Note: Once we support ptrace with hw-debug register access, make sure
1813       BP_CPU break/watchpoints are handled correctly on clone. */
1814    QTAILQ_INIT(&env->breakpoints);
1815    QTAILQ_INIT(&env->watchpoints);
1816#if defined(TARGET_HAS_ICE)
1817    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1818        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1819    }
1820    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1821        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1822                              wp->flags, NULL);
1823    }
1824#endif
1825
1826    return new_env;
1827}
1828
1829#if !defined(CONFIG_USER_ONLY)
1830void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1831{
1832    unsigned int i;
1833
1834    /* Discard jump cache entries for any tb which might potentially
1835       overlap the flushed page.  */
1836    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1837    memset (&env->tb_jmp_cache[i], 0, 
1838            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1839
1840    i = tb_jmp_cache_hash_page(addr);
1841    memset (&env->tb_jmp_cache[i], 0, 
1842            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1843}
1844
1845static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1846                                      uintptr_t length)
1847{
1848    uintptr_t start1;
1849
1850    /* we modify the TLB cache so that the dirty bit will be set again
1851       when accessing the range */
1852    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1853    /* Check that we don't span multiple blocks - this breaks the
1854       address comparisons below.  */
1855    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1856            != (end - 1) - start) {
1857        abort();
1858    }
1859    cpu_tlb_reset_dirty_all(start1, length);
1860
1861}
1862
1863/* Note: start and end must be within the same ram block.  */
1864void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1865                                     int dirty_flags)
1866{
1867    uintptr_t length;
1868
1869    start &= TARGET_PAGE_MASK;
1870    end = TARGET_PAGE_ALIGN(end);
1871
1872    length = end - start;
1873    if (length == 0)
1874        return;
1875    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1876
1877    if (tcg_enabled()) {
1878        tlb_reset_dirty_range_all(start, end, length);
1879    }
1880}
1881
1882static int cpu_physical_memory_set_dirty_tracking(int enable)
1883{
1884    int ret = 0;
1885    in_migration = enable;
1886    return ret;
1887}
1888
1889hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1890                                                   MemoryRegionSection *section,
1891                                                   target_ulong vaddr,
1892                                                   hwaddr paddr,
1893                                                   int prot,
1894                                                   target_ulong *address)
1895{
1896    hwaddr iotlb;
1897    CPUWatchpoint *wp;
1898
1899    if (memory_region_is_ram(section->mr)) {
1900        /* Normal RAM.  */
1901        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1902            + memory_region_section_addr(section, paddr);
1903        if (!section->readonly) {
1904            iotlb |= phys_section_notdirty;
1905        } else {
1906            iotlb |= phys_section_rom;
1907        }
1908    } else {
1909        /* IO handlers are currently passed a physical address.
1910           It would be nice to pass an offset from the base address
1911           of that region.  This would avoid having to special case RAM,
1912           and avoid full address decoding in every device.
1913           We can't use the high bits of pd for this because
1914           IO_MEM_ROMD uses these as a ram address.  */
1915        iotlb = section - phys_sections;
1916        iotlb += memory_region_section_addr(section, paddr);
1917    }
1918
1919    /* Make accesses to pages with watchpoints go via the
1920       watchpoint trap routines.  */
1921    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1922        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1923            /* Avoid trapping reads of pages with a write breakpoint. */
1924            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1925                iotlb = phys_section_watch + paddr;
1926                *address |= TLB_MMIO;
1927                break;
1928            }
1929        }
1930    }
1931
1932    return iotlb;
1933}
1934
1935#else
1936/*
1937 * Walks guest process memory "regions" one by one
1938 * and calls callback function 'fn' for each region.
1939 */
1940
1941struct walk_memory_regions_data
1942{
1943    walk_memory_regions_fn fn;
1944    void *priv;
1945    uintptr_t start;
1946    int prot;
1947};
1948
1949static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1950                                   abi_ulong end, int new_prot)
1951{
1952    if (data->start != -1ul) {
1953        int rc = data->fn(data->priv, data->start, end, data->prot);
1954        if (rc != 0) {
1955            return rc;
1956        }
1957    }
1958
1959    data->start = (new_prot ? end : -1ul);
1960    data->prot = new_prot;
1961
1962    return 0;
1963}
1964
1965static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1966                                 abi_ulong base, int level, void **lp)
1967{
1968    abi_ulong pa;
1969    int i, rc;
1970
1971    if (*lp == NULL) {
1972        return walk_memory_regions_end(data, base, 0);
1973    }
1974
1975    if (level == 0) {
1976        PageDesc *pd = *lp;
1977        for (i = 0; i < L2_SIZE; ++i) {
1978            int prot = pd[i].flags;
1979
1980            pa = base | (i << TARGET_PAGE_BITS);
1981            if (prot != data->prot) {
1982                rc = walk_memory_regions_end(data, pa, prot);
1983                if (rc != 0) {
1984                    return rc;
1985                }
1986            }
1987        }
1988    } else {
1989        void **pp = *lp;
1990        for (i = 0; i < L2_SIZE; ++i) {
1991            pa = base | ((abi_ulong)i <<
1992                (TARGET_PAGE_BITS + L2_BITS * level));
1993            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1994            if (rc != 0) {
1995                return rc;
1996            }
1997        }
1998    }
1999
2000    return 0;
2001}
2002
2003int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2004{
2005    struct walk_memory_regions_data data;
2006    uintptr_t i;
2007
2008    data.fn = fn;
2009    data.priv = priv;
2010    data.start = -1ul;
2011    data.prot = 0;
2012
2013    for (i = 0; i < V_L1_SIZE; i++) {
2014        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2015                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2016        if (rc != 0) {
2017            return rc;
2018        }
2019    }
2020
2021    return walk_memory_regions_end(&data, 0, 0);
2022}
2023
2024static int dump_region(void *priv, abi_ulong start,
2025    abi_ulong end, unsigned long prot)
2026{
2027    FILE *f = (FILE *)priv;
2028
2029    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2030        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2031        start, end, end - start,
2032        ((prot & PAGE_READ) ? 'r' : '-'),
2033        ((prot & PAGE_WRITE) ? 'w' : '-'),
2034        ((prot & PAGE_EXEC) ? 'x' : '-'));
2035
2036    return (0);
2037}
2038
2039/* dump memory mappings */
2040void page_dump(FILE *f)
2041{
2042    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2043            "start", "end", "size", "prot");
2044    walk_memory_regions(f, dump_region);
2045}
2046
2047int page_get_flags(target_ulong address)
2048{
2049    PageDesc *p;
2050
2051    p = page_find(address >> TARGET_PAGE_BITS);
2052    if (!p)
2053        return 0;
2054    return p->flags;
2055}
2056
2057/* Modify the flags of a page and invalidate the code if necessary.
2058   The flag PAGE_WRITE_ORG is positioned automatically depending
2059   on PAGE_WRITE.  The mmap_lock should already be held.  */
2060void page_set_flags(target_ulong start, target_ulong end, int flags)
2061{
2062    target_ulong addr, len;
2063
2064    /* This function should never be called with addresses outside the
2065       guest address space.  If this assert fires, it probably indicates
2066       a missing call to h2g_valid.  */
2067#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2068    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2069#endif
2070    assert(start < end);
2071
2072    start = start & TARGET_PAGE_MASK;
2073    end = TARGET_PAGE_ALIGN(end);
2074
2075    if (flags & PAGE_WRITE) {
2076        flags |= PAGE_WRITE_ORG;
2077    }
2078
2079    for (addr = start, len = end - start;
2080         len != 0;
2081         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2082        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2083
2084        /* If the write protection bit is set, then we invalidate
2085           the code inside.  */
2086        if (!(p->flags & PAGE_WRITE) &&
2087            (flags & PAGE_WRITE) &&
2088            p->first_tb) {
2089            tb_invalidate_phys_page(addr, 0, NULL);
2090        }
2091        p->flags = flags;
2092    }
2093}
2094
2095int page_check_range(target_ulong start, target_ulong len, int flags)
2096{
2097    PageDesc *p;
2098    target_ulong end;
2099    target_ulong addr;
2100
2101    /* This function should never be called with addresses outside the
2102       guest address space.  If this assert fires, it probably indicates
2103       a missing call to h2g_valid.  */
2104#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2105    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2106#endif
2107
2108    if (len == 0) {
2109        return 0;
2110    }
2111    if (start + len - 1 < start) {
2112        /* We've wrapped around.  */
2113        return -1;
2114    }
2115
2116    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2117    start = start & TARGET_PAGE_MASK;
2118
2119    for (addr = start, len = end - start;
2120         len != 0;
2121         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2122        p = page_find(addr >> TARGET_PAGE_BITS);
2123        if( !p )
2124            return -1;
2125        if( !(p->flags & PAGE_VALID) )
2126            return -1;
2127
2128        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2129            return -1;
2130        if (flags & PAGE_WRITE) {
2131            if (!(p->flags & PAGE_WRITE_ORG))
2132                return -1;
2133            /* unprotect the page if it was put read-only because it
2134               contains translated code */
2135            if (!(p->flags & PAGE_WRITE)) {
2136                if (!page_unprotect(addr, 0, NULL))
2137                    return -1;
2138            }
2139            return 0;
2140        }
2141    }
2142    return 0;
2143}
2144
2145/* called from signal handler: invalidate the code and unprotect the
2146   page. Return TRUE if the fault was successfully handled. */
2147int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2148{
2149    unsigned int prot;
2150    PageDesc *p;
2151    target_ulong host_start, host_end, addr;
2152
2153    /* Technically this isn't safe inside a signal handler.  However we
2154       know this only ever happens in a synchronous SEGV handler, so in
2155       practice it seems to be ok.  */
2156    mmap_lock();
2157
2158    p = page_find(address >> TARGET_PAGE_BITS);
2159    if (!p) {
2160        mmap_unlock();
2161        return 0;
2162    }
2163
2164    /* if the page was really writable, then we change its
2165       protection back to writable */
2166    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2167        host_start = address & qemu_host_page_mask;
2168        host_end = host_start + qemu_host_page_size;
2169
2170        prot = 0;
2171        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2172            p = page_find(addr >> TARGET_PAGE_BITS);
2173            p->flags |= PAGE_WRITE;
2174            prot |= p->flags;
2175
2176            /* and since the content will be modified, we must invalidate
2177               the corresponding translated code. */
2178            tb_invalidate_phys_page(addr, pc, puc);
2179#ifdef DEBUG_TB_CHECK
2180            tb_invalidate_check(addr);
2181#endif
2182        }
2183        mprotect((void *)g2h(host_start), qemu_host_page_size,
2184                 prot & PAGE_BITS);
2185
2186        mmap_unlock();
2187        return 1;
2188    }
2189    mmap_unlock();
2190    return 0;
2191}
2192#endif /* defined(CONFIG_USER_ONLY) */
2193
2194#if !defined(CONFIG_USER_ONLY)
2195
2196#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2197typedef struct subpage_t {
2198    MemoryRegion iomem;
2199    hwaddr base;
2200    uint16_t sub_section[TARGET_PAGE_SIZE];
2201} subpage_t;
2202
2203static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2204                             uint16_t section);
2205static subpage_t *subpage_init(hwaddr base);
2206static void destroy_page_desc(uint16_t section_index)
2207{
2208    MemoryRegionSection *section = &phys_sections[section_index];
2209    MemoryRegion *mr = section->mr;
2210
2211    if (mr->subpage) {
2212        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2213        memory_region_destroy(&subpage->iomem);
2214        g_free(subpage);
2215    }
2216}
2217
2218static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2219{
2220    unsigned i;
2221    PhysPageEntry *p;
2222
2223    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2224        return;
2225    }
2226
2227    p = phys_map_nodes[lp->ptr];
2228    for (i = 0; i < L2_SIZE; ++i) {
2229        if (!p[i].is_leaf) {
2230            destroy_l2_mapping(&p[i], level - 1);
2231        } else {
2232            destroy_page_desc(p[i].ptr);
2233        }
2234    }
2235    lp->is_leaf = 0;
2236    lp->ptr = PHYS_MAP_NODE_NIL;
2237}
2238
2239static void destroy_all_mappings(AddressSpaceDispatch *d)
2240{
2241    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2242    phys_map_nodes_reset();
2243}
2244
2245static uint16_t phys_section_add(MemoryRegionSection *section)
2246{
2247    if (phys_sections_nb == phys_sections_nb_alloc) {
2248        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2249        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2250                                phys_sections_nb_alloc);
2251    }
2252    phys_sections[phys_sections_nb] = *section;
2253    return phys_sections_nb++;
2254}
2255
2256static void phys_sections_clear(void)
2257{
2258    phys_sections_nb = 0;
2259}
2260
2261static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2262{
2263    subpage_t *subpage;
2264    hwaddr base = section->offset_within_address_space
2265        & TARGET_PAGE_MASK;
2266    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2267    MemoryRegionSection subsection = {
2268        .offset_within_address_space = base,
2269        .size = TARGET_PAGE_SIZE,
2270    };
2271    hwaddr start, end;
2272
2273    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2274
2275    if (!(existing->mr->subpage)) {
2276        subpage = subpage_init(base);
2277        subsection.mr = &subpage->iomem;
2278        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2279                      phys_section_add(&subsection));
2280    } else {
2281        subpage = container_of(existing->mr, subpage_t, iomem);
2282    }
2283    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2284    end = start + section->size - 1;
2285    subpage_register(subpage, start, end, phys_section_add(section));
2286}
2287
2288
2289static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2290{
2291    hwaddr start_addr = section->offset_within_address_space;
2292    ram_addr_t size = section->size;
2293    hwaddr addr;
2294    uint16_t section_index = phys_section_add(section);
2295
2296    assert(size);
2297
2298    addr = start_addr;
2299    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2300                  section_index);
2301}
2302
2303static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2304{
2305    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2306    MemoryRegionSection now = *section, remain = *section;
2307
2308    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2309        || (now.size < TARGET_PAGE_SIZE)) {
2310        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2311                       - now.offset_within_address_space,
2312                       now.size);
2313        register_subpage(d, &now);
2314        remain.size -= now.size;
2315        remain.offset_within_address_space += now.size;
2316        remain.offset_within_region += now.size;
2317    }
2318    while (remain.size >= TARGET_PAGE_SIZE) {
2319        now = remain;
2320        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2321            now.size = TARGET_PAGE_SIZE;
2322            register_subpage(d, &now);
2323        } else {
2324            now.size &= TARGET_PAGE_MASK;
2325            register_multipage(d, &now);
2326        }
2327        remain.size -= now.size;
2328        remain.offset_within_address_space += now.size;
2329        remain.offset_within_region += now.size;
2330    }
2331    now = remain;
2332    if (now.size) {
2333        register_subpage(d, &now);
2334    }
2335}
2336
2337void qemu_flush_coalesced_mmio_buffer(void)
2338{
2339    if (kvm_enabled())
2340        kvm_flush_coalesced_mmio_buffer();
2341}
2342
2343#if defined(__linux__) && !defined(TARGET_S390X)
2344
2345#include <sys/vfs.h>
2346
2347#define HUGETLBFS_MAGIC       0x958458f6
2348
2349static long gethugepagesize(const char *path)
2350{
2351    struct statfs fs;
2352    int ret;
2353
2354    do {
2355        ret = statfs(path, &fs);
2356    } while (ret != 0 && errno == EINTR);
2357
2358    if (ret != 0) {
2359        perror(path);
2360        return 0;
2361    }
2362
2363    if (fs.f_type != HUGETLBFS_MAGIC)
2364        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2365
2366    return fs.f_bsize;
2367}
2368
2369static void *file_ram_alloc(RAMBlock *block,
2370                            ram_addr_t memory,
2371                            const char *path)
2372{
2373    char *filename;
2374    void *area;
2375    int fd;
2376#ifdef MAP_POPULATE
2377    int flags;
2378#endif
2379    unsigned long hpagesize;
2380
2381    hpagesize = gethugepagesize(path);
2382    if (!hpagesize) {
2383        return NULL;
2384    }
2385
2386    if (memory < hpagesize) {
2387        return NULL;
2388    }
2389
2390    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2391        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2392        return NULL;
2393    }
2394
2395    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2396        return NULL;
2397    }
2398
2399    fd = mkstemp(filename);
2400    if (fd < 0) {
2401        perror("unable to create backing store for hugepages");
2402        free(filename);
2403        return NULL;
2404    }
2405    unlink(filename);
2406    free(filename);
2407
2408    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2409
2410    /*
2411     * ftruncate is not supported by hugetlbfs in older
2412     * hosts, so don't bother bailing out on errors.
2413     * If anything goes wrong with it under other filesystems,
2414     * mmap will fail.
2415     */
2416    if (ftruncate(fd, memory))
2417        perror("ftruncate");
2418
2419#ifdef MAP_POPULATE
2420    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2421     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2422     * to sidestep this quirk.
2423     */
2424    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2425    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2426#else
2427    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2428#endif
2429    if (area == MAP_FAILED) {
2430        perror("file_ram_alloc: can't mmap RAM pages");
2431        close(fd);
2432        return (NULL);
2433    }
2434    block->fd = fd;
2435    return area;
2436}
2437#endif
2438
2439static ram_addr_t find_ram_offset(ram_addr_t size)
2440{
2441    RAMBlock *block, *next_block;
2442    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2443
2444    if (QLIST_EMPTY(&ram_list.blocks))
2445        return 0;
2446
2447    QLIST_FOREACH(block, &ram_list.blocks, next) {
2448        ram_addr_t end, next = RAM_ADDR_MAX;
2449
2450        end = block->offset + block->length;
2451
2452        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2453            if (next_block->offset >= end) {
2454                next = MIN(next, next_block->offset);
2455            }
2456        }
2457        if (next - end >= size && next - end < mingap) {
2458            offset = end;
2459            mingap = next - end;
2460        }
2461    }
2462
2463    if (offset == RAM_ADDR_MAX) {
2464        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2465                (uint64_t)size);
2466        abort();
2467    }
2468
2469    return offset;
2470}
2471
2472ram_addr_t last_ram_offset(void)
2473{
2474    RAMBlock *block;
2475    ram_addr_t last = 0;
2476
2477    QLIST_FOREACH(block, &ram_list.blocks, next)
2478        last = MAX(last, block->offset + block->length);
2479
2480    return last;
2481}
2482
2483static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2484{
2485    int ret;
2486    QemuOpts *machine_opts;
2487
2488    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2489    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2490    if (machine_opts &&
2491        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2492        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2493        if (ret) {
2494            perror("qemu_madvise");
2495            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2496                            "but dump_guest_core=off specified\n");
2497        }
2498    }
2499}
2500
2501void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2502{
2503    RAMBlock *new_block, *block;
2504
2505    new_block = NULL;
2506    QLIST_FOREACH(block, &ram_list.blocks, next) {
2507        if (block->offset == addr) {
2508            new_block = block;
2509            break;
2510        }
2511    }
2512    assert(new_block);
2513    assert(!new_block->idstr[0]);
2514
2515    if (dev) {
2516        char *id = qdev_get_dev_path(dev);
2517        if (id) {
2518            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2519            g_free(id);
2520        }
2521    }
2522    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2523
2524    QLIST_FOREACH(block, &ram_list.blocks, next) {
2525        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2526            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2527                    new_block->idstr);
2528            abort();
2529        }
2530    }
2531}
2532
2533static int memory_try_enable_merging(void *addr, size_t len)
2534{
2535    QemuOpts *opts;
2536
2537    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2538    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2539        /* disabled by the user */
2540        return 0;
2541    }
2542
2543    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2544}
2545
2546ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2547                                   MemoryRegion *mr)
2548{
2549    RAMBlock *new_block;
2550
2551    size = TARGET_PAGE_ALIGN(size);
2552    new_block = g_malloc0(sizeof(*new_block));
2553
2554    new_block->mr = mr;
2555    new_block->offset = find_ram_offset(size);
2556    if (host) {
2557        new_block->host = host;
2558        new_block->flags |= RAM_PREALLOC_MASK;
2559    } else {
2560        if (mem_path) {
2561#if defined (__linux__) && !defined(TARGET_S390X)
2562            new_block->host = file_ram_alloc(new_block, size, mem_path);
2563            if (!new_block->host) {
2564                new_block->host = qemu_vmalloc(size);
2565                memory_try_enable_merging(new_block->host, size);
2566            }
2567#else
2568            fprintf(stderr, "-mem-path option unsupported\n");
2569            exit(1);
2570#endif
2571        } else {
2572            if (xen_enabled()) {
2573                xen_ram_alloc(new_block->offset, size, mr);
2574            } else if (kvm_enabled()) {
2575                /* some s390/kvm configurations have special constraints */
2576                new_block->host = kvm_vmalloc(size);
2577            } else {
2578                new_block->host = qemu_vmalloc(size);
2579            }
2580            memory_try_enable_merging(new_block->host, size);
2581        }
2582    }
2583    new_block->length = size;
2584
2585    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2586
2587    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2588                                       last_ram_offset() >> TARGET_PAGE_BITS);
2589    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2590           0, size >> TARGET_PAGE_BITS);
2591    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2592
2593    qemu_ram_setup_dump(new_block->host, size);
2594    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2595
2596    if (kvm_enabled())
2597        kvm_setup_guest_memory(new_block->host, size);
2598
2599    return new_block->offset;
2600}
2601
2602ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2603{
2604    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2605}
2606
2607void qemu_ram_free_from_ptr(ram_addr_t addr)
2608{
2609    RAMBlock *block;
2610
2611    QLIST_FOREACH(block, &ram_list.blocks, next) {
2612        if (addr == block->offset) {
2613            QLIST_REMOVE(block, next);
2614            g_free(block);
2615            return;
2616        }
2617    }
2618}
2619
2620void qemu_ram_free(ram_addr_t addr)
2621{
2622    RAMBlock *block;
2623
2624    QLIST_FOREACH(block, &ram_list.blocks, next) {
2625        if (addr == block->offset) {
2626            QLIST_REMOVE(block, next);
2627            if (block->flags & RAM_PREALLOC_MASK) {
2628                ;
2629            } else if (mem_path) {
2630#if defined (__linux__) && !defined(TARGET_S390X)
2631                if (block->fd) {
2632                    munmap(block->host, block->length);
2633                    close(block->fd);
2634                } else {
2635                    qemu_vfree(block->host);
2636                }
2637#else
2638                abort();
2639#endif
2640            } else {
2641#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2642                munmap(block->host, block->length);
2643#else
2644                if (xen_enabled()) {
2645                    xen_invalidate_map_cache_entry(block->host);
2646                } else {
2647                    qemu_vfree(block->host);
2648                }
2649#endif
2650            }
2651            g_free(block);
2652            return;
2653        }
2654    }
2655
2656}
2657
2658#ifndef _WIN32
2659void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2660{
2661    RAMBlock *block;
2662    ram_addr_t offset;
2663    int flags;
2664    void *area, *vaddr;
2665
2666    QLIST_FOREACH(block, &ram_list.blocks, next) {
2667        offset = addr - block->offset;
2668        if (offset < block->length) {
2669            vaddr = block->host + offset;
2670            if (block->flags & RAM_PREALLOC_MASK) {
2671                ;
2672            } else {
2673                flags = MAP_FIXED;
2674                munmap(vaddr, length);
2675                if (mem_path) {
2676#if defined(__linux__) && !defined(TARGET_S390X)
2677                    if (block->fd) {
2678#ifdef MAP_POPULATE
2679                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2680                            MAP_PRIVATE;
2681#else
2682                        flags |= MAP_PRIVATE;
2683#endif
2684                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685                                    flags, block->fd, offset);
2686                    } else {
2687                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2688                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2689                                    flags, -1, 0);
2690                    }
2691#else
2692                    abort();
2693#endif
2694                } else {
2695#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2696                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2697                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2698                                flags, -1, 0);
2699#else
2700                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2701                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2702                                flags, -1, 0);
2703#endif
2704                }
2705                if (area != vaddr) {
2706                    fprintf(stderr, "Could not remap addr: "
2707                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2708                            length, addr);
2709                    exit(1);
2710                }
2711                memory_try_enable_merging(vaddr, length);
2712                qemu_ram_setup_dump(vaddr, length);
2713            }
2714            return;
2715        }
2716    }
2717}
2718#endif /* !_WIN32 */
2719
2720/* Return a host pointer to ram allocated with qemu_ram_alloc.
2721   With the exception of the softmmu code in this file, this should
2722   only be used for local memory (e.g. video ram) that the device owns,
2723   and knows it isn't going to access beyond the end of the block.
2724
2725   It should not be used for general purpose DMA.
2726   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2727 */
2728void *qemu_get_ram_ptr(ram_addr_t addr)
2729{
2730    RAMBlock *block;
2731
2732    QLIST_FOREACH(block, &ram_list.blocks, next) {
2733        if (addr - block->offset < block->length) {
2734            /* Move this entry to to start of the list.  */
2735            if (block != QLIST_FIRST(&ram_list.blocks)) {
2736                QLIST_REMOVE(block, next);
2737                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2738            }
2739            if (xen_enabled()) {
2740                /* We need to check if the requested address is in the RAM
2741                 * because we don't want to map the entire memory in QEMU.
2742                 * In that case just map until the end of the page.
2743                 */
2744                if (block->offset == 0) {
2745                    return xen_map_cache(addr, 0, 0);
2746                } else if (block->host == NULL) {
2747                    block->host =
2748                        xen_map_cache(block->offset, block->length, 1);
2749                }
2750            }
2751            return block->host + (addr - block->offset);
2752        }
2753    }
2754
2755    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2756    abort();
2757
2758    return NULL;
2759}
2760
2761/* Return a host pointer to ram allocated with qemu_ram_alloc.
2762 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2763 */
2764static void *qemu_safe_ram_ptr(ram_addr_t addr)
2765{
2766    RAMBlock *block;
2767
2768    QLIST_FOREACH(block, &ram_list.blocks, next) {
2769        if (addr - block->offset < block->length) {
2770            if (xen_enabled()) {
2771                /* We need to check if the requested address is in the RAM
2772                 * because we don't want to map the entire memory in QEMU.
2773                 * In that case just map until the end of the page.
2774                 */
2775                if (block->offset == 0) {
2776                    return xen_map_cache(addr, 0, 0);
2777                } else if (block->host == NULL) {
2778                    block->host =
2779                        xen_map_cache(block->offset, block->length, 1);
2780                }
2781            }
2782            return block->host + (addr - block->offset);
2783        }
2784    }
2785
2786    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2787    abort();
2788
2789    return NULL;
2790}
2791
2792/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2793 * but takes a size argument */
2794static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2795{
2796    if (*size == 0) {
2797        return NULL;
2798    }
2799    if (xen_enabled()) {
2800        return xen_map_cache(addr, *size, 1);
2801    } else {
2802        RAMBlock *block;
2803
2804        QLIST_FOREACH(block, &ram_list.blocks, next) {
2805            if (addr - block->offset < block->length) {
2806                if (addr - block->offset + *size > block->length)
2807                    *size = block->length - addr + block->offset;
2808                return block->host + (addr - block->offset);
2809            }
2810        }
2811
2812        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2813        abort();
2814    }
2815}
2816
2817void qemu_put_ram_ptr(void *addr)
2818{
2819    trace_qemu_put_ram_ptr(addr);
2820}
2821
2822int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2823{
2824    RAMBlock *block;
2825    uint8_t *host = ptr;
2826
2827    if (xen_enabled()) {
2828        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2829        return 0;
2830    }
2831
2832    QLIST_FOREACH(block, &ram_list.blocks, next) {
2833        /* This case append when the block is not mapped. */
2834        if (block->host == NULL) {
2835            continue;
2836        }
2837        if (host - block->host < block->length) {
2838            *ram_addr = block->offset + (host - block->host);
2839            return 0;
2840        }
2841    }
2842
2843    return -1;
2844}
2845
2846/* Some of the softmmu routines need to translate from a host pointer
2847   (typically a TLB entry) back to a ram offset.  */
2848ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2849{
2850    ram_addr_t ram_addr;
2851
2852    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2853        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2854        abort();
2855    }
2856    return ram_addr;
2857}
2858
2859static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2860                                    unsigned size)
2861{
2862#ifdef DEBUG_UNASSIGNED
2863    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2864#endif
2865#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2866    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2867#endif
2868    return 0;
2869}
2870
2871static void unassigned_mem_write(void *opaque, hwaddr addr,
2872                                 uint64_t val, unsigned size)
2873{
2874#ifdef DEBUG_UNASSIGNED
2875    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2876#endif
2877#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2878    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2879#endif
2880}
2881
2882static const MemoryRegionOps unassigned_mem_ops = {
2883    .read = unassigned_mem_read,
2884    .write = unassigned_mem_write,
2885    .endianness = DEVICE_NATIVE_ENDIAN,
2886};
2887
2888static uint64_t error_mem_read(void *opaque, hwaddr addr,
2889                               unsigned size)
2890{
2891    abort();
2892}
2893
2894static void error_mem_write(void *opaque, hwaddr addr,
2895                            uint64_t value, unsigned size)
2896{
2897    abort();
2898}
2899
2900static const MemoryRegionOps error_mem_ops = {
2901    .read = error_mem_read,
2902    .write = error_mem_write,
2903    .endianness = DEVICE_NATIVE_ENDIAN,
2904};
2905
2906static const MemoryRegionOps rom_mem_ops = {
2907    .read = error_mem_read,
2908    .write = unassigned_mem_write,
2909    .endianness = DEVICE_NATIVE_ENDIAN,
2910};
2911
2912static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2913                               uint64_t val, unsigned size)
2914{
2915    int dirty_flags;
2916    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2917    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2918#if !defined(CONFIG_USER_ONLY)
2919        tb_invalidate_phys_page_fast(ram_addr, size);
2920        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2921#endif
2922    }
2923    switch (size) {
2924    case 1:
2925        stb_p(qemu_get_ram_ptr(ram_addr), val);
2926        break;
2927    case 2:
2928        stw_p(qemu_get_ram_ptr(ram_addr), val);
2929        break;
2930    case 4:
2931        stl_p(qemu_get_ram_ptr(ram_addr), val);
2932        break;
2933    default:
2934        abort();
2935    }
2936    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2937    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2938    /* we remove the notdirty callback only if the code has been
2939       flushed */
2940    if (dirty_flags == 0xff)
2941        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2942}
2943
2944static const MemoryRegionOps notdirty_mem_ops = {
2945    .read = error_mem_read,
2946    .write = notdirty_mem_write,
2947    .endianness = DEVICE_NATIVE_ENDIAN,
2948};
2949
2950/* Generate a debug exception if a watchpoint has been hit.  */
2951static void check_watchpoint(int offset, int len_mask, int flags)
2952{
2953    CPUArchState *env = cpu_single_env;
2954    target_ulong pc, cs_base;
2955    TranslationBlock *tb;
2956    target_ulong vaddr;
2957    CPUWatchpoint *wp;
2958    int cpu_flags;
2959
2960    if (env->watchpoint_hit) {
2961        /* We re-entered the check after replacing the TB. Now raise
2962         * the debug interrupt so that is will trigger after the
2963         * current instruction. */
2964        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2965        return;
2966    }
2967    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2968    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2969        if ((vaddr == (wp->vaddr & len_mask) ||
2970             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2971            wp->flags |= BP_WATCHPOINT_HIT;
2972            if (!env->watchpoint_hit) {
2973                env->watchpoint_hit = wp;
2974                tb = tb_find_pc(env->mem_io_pc);
2975                if (!tb) {
2976                    cpu_abort(env, "check_watchpoint: could not find TB for "
2977                              "pc=%p", (void *)env->mem_io_pc);
2978                }
2979                cpu_restore_state(tb, env, env->mem_io_pc);
2980                tb_phys_invalidate(tb, -1);
2981                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2982                    env->exception_index = EXCP_DEBUG;
2983                    cpu_loop_exit(env);
2984                } else {
2985                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2986                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2987                    cpu_resume_from_signal(env, NULL);
2988                }
2989            }
2990        } else {
2991            wp->flags &= ~BP_WATCHPOINT_HIT;
2992        }
2993    }
2994}
2995
2996/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2997   so these check for a hit then pass through to the normal out-of-line
2998   phys routines.  */
2999static uint64_t watch_mem_read(void *opaque, hwaddr addr,
3000                               unsigned size)
3001{
3002    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3003    switch (size) {
3004    case 1: return ldub_phys(addr);
3005    case 2: return lduw_phys(addr);
3006    case 4: return ldl_phys(addr);
3007    default: abort();
3008    }
3009}
3010
3011static void watch_mem_write(void *opaque, hwaddr addr,
3012                            uint64_t val, unsigned size)
3013{
3014    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3015    switch (size) {
3016    case 1:
3017        stb_phys(addr, val);
3018        break;
3019    case 2:
3020        stw_phys(addr, val);
3021        break;
3022    case 4:
3023        stl_phys(addr, val);
3024        break;
3025    default: abort();
3026    }
3027}
3028
3029static const MemoryRegionOps watch_mem_ops = {
3030    .read = watch_mem_read,
3031    .write = watch_mem_write,
3032    .endianness = DEVICE_NATIVE_ENDIAN,
3033};
3034
3035static uint64_t subpage_read(void *opaque, hwaddr addr,
3036                             unsigned len)
3037{
3038    subpage_t *mmio = opaque;
3039    unsigned int idx = SUBPAGE_IDX(addr);
3040    MemoryRegionSection *section;
3041#if defined(DEBUG_SUBPAGE)
3042    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3043           mmio, len, addr, idx);
3044#endif
3045
3046    section = &phys_sections[mmio->sub_section[idx]];
3047    addr += mmio->base;
3048    addr -= section->offset_within_address_space;
3049    addr += section->offset_within_region;
3050    return io_mem_read(section->mr, addr, len);
3051}
3052
3053static void subpage_write(void *opaque, hwaddr addr,
3054                          uint64_t value, unsigned len)
3055{
3056    subpage_t *mmio = opaque;
3057    unsigned int idx = SUBPAGE_IDX(addr);
3058    MemoryRegionSection *section;
3059#if defined(DEBUG_SUBPAGE)
3060    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3061           " idx %d value %"PRIx64"\n",
3062           __func__, mmio, len, addr, idx, value);
3063#endif
3064
3065    section = &phys_sections[mmio->sub_section[idx]];
3066    addr += mmio->base;
3067    addr -= section->offset_within_address_space;
3068    addr += section->offset_within_region;
3069    io_mem_write(section->mr, addr, value, len);
3070}
3071
3072static const MemoryRegionOps subpage_ops = {
3073    .read = subpage_read,
3074    .write = subpage_write,
3075    .endianness = DEVICE_NATIVE_ENDIAN,
3076};
3077
3078static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3079                                 unsigned size)
3080{
3081    ram_addr_t raddr = addr;
3082    void *ptr = qemu_get_ram_ptr(raddr);
3083    switch (size) {
3084    case 1: return ldub_p(ptr);
3085    case 2: return lduw_p(ptr);
3086    case 4: return ldl_p(ptr);
3087    default: abort();
3088    }
3089}
3090
3091static void subpage_ram_write(void *opaque, hwaddr addr,
3092                              uint64_t value, unsigned size)
3093{
3094    ram_addr_t raddr = addr;
3095    void *ptr = qemu_get_ram_ptr(raddr);
3096    switch (size) {
3097    case 1: return stb_p(ptr, value);
3098    case 2: return stw_p(ptr, value);
3099    case 4: return stl_p(ptr, value);
3100    default: abort();
3101    }
3102}
3103
3104static const MemoryRegionOps subpage_ram_ops = {
3105    .read = subpage_ram_read,
3106    .write = subpage_ram_write,
3107    .endianness = DEVICE_NATIVE_ENDIAN,
3108};
3109
3110static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3111                             uint16_t section)
3112{
3113    int idx, eidx;
3114
3115    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3116        return -1;
3117    idx = SUBPAGE_IDX(start);
3118    eidx = SUBPAGE_IDX(end);
3119#if defined(DEBUG_SUBPAGE)
3120    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3121           mmio, start, end, idx, eidx, memory);
3122#endif
3123    if (memory_region_is_ram(phys_sections[section].mr)) {
3124        MemoryRegionSection new_section = phys_sections[section];
3125        new_section.mr = &io_mem_subpage_ram;
3126        section = phys_section_add(&new_section);
3127    }
3128    for (; idx <= eidx; idx++) {
3129        mmio->sub_section[idx] = section;
3130    }
3131
3132    return 0;
3133}
3134
3135static subpage_t *subpage_init(hwaddr base)
3136{
3137    subpage_t *mmio;
3138
3139    mmio = g_malloc0(sizeof(subpage_t));
3140
3141    mmio->base = base;
3142    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3143                          "subpage", TARGET_PAGE_SIZE);
3144    mmio->iomem.subpage = true;
3145#if defined(DEBUG_SUBPAGE)
3146    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3147           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3148#endif
3149    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3150
3151    return mmio;
3152}
3153
3154static uint16_t dummy_section(MemoryRegion *mr)
3155{
3156    MemoryRegionSection section = {
3157        .mr = mr,
3158        .offset_within_address_space = 0,
3159        .offset_within_region = 0,
3160        .size = UINT64_MAX,
3161    };
3162
3163    return phys_section_add(&section);
3164}
3165
3166MemoryRegion *iotlb_to_region(hwaddr index)
3167{
3168    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3169}
3170
3171static void io_mem_init(void)
3172{
3173    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3174    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3175    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3176                          "unassigned", UINT64_MAX);
3177    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3178                          "notdirty", UINT64_MAX);
3179    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3180                          "subpage-ram", UINT64_MAX);
3181    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3182                          "watch", UINT64_MAX);
3183}
3184
3185static void mem_begin(MemoryListener *listener)
3186{
3187    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3188
3189    destroy_all_mappings(d);
3190    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3191}
3192
3193static void core_begin(MemoryListener *listener)
3194{
3195    phys_sections_clear();
3196    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3197    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3198    phys_section_rom = dummy_section(&io_mem_rom);
3199    phys_section_watch = dummy_section(&io_mem_watch);
3200}
3201
3202static void tcg_commit(MemoryListener *listener)
3203{
3204    CPUArchState *env;
3205
3206    /* since each CPU stores ram addresses in its TLB cache, we must
3207       reset the modified entries */
3208    /* XXX: slow ! */
3209    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3210        tlb_flush(env, 1);
3211    }
3212}
3213
3214static void core_log_global_start(MemoryListener *listener)
3215{
3216    cpu_physical_memory_set_dirty_tracking(1);
3217}
3218
3219static void core_log_global_stop(MemoryListener *listener)
3220{
3221    cpu_physical_memory_set_dirty_tracking(0);
3222}
3223
3224static void io_region_add(MemoryListener *listener,
3225                          MemoryRegionSection *section)
3226{
3227    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3228
3229    mrio->mr = section->mr;
3230    mrio->offset = section->offset_within_region;
3231    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3232                 section->offset_within_address_space, section->size);
3233    ioport_register(&mrio->iorange);
3234}
3235
3236static void io_region_del(MemoryListener *listener,
3237                          MemoryRegionSection *section)
3238{
3239    isa_unassign_ioport(section->offset_within_address_space, section->size);
3240}
3241
3242static MemoryListener core_memory_listener = {
3243    .begin = core_begin,
3244    .log_global_start = core_log_global_start,
3245    .log_global_stop = core_log_global_stop,
3246    .priority = 1,
3247};
3248
3249static MemoryListener io_memory_listener = {
3250    .region_add = io_region_add,
3251    .region_del = io_region_del,
3252    .priority = 0,
3253};
3254
3255static MemoryListener tcg_memory_listener = {
3256    .commit = tcg_commit,
3257};
3258
3259void address_space_init_dispatch(AddressSpace *as)
3260{
3261    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3262
3263    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3264    d->listener = (MemoryListener) {
3265        .begin = mem_begin,
3266        .region_add = mem_add,
3267        .region_nop = mem_add,
3268        .priority = 0,
3269    };
3270    as->dispatch = d;
3271    memory_listener_register(&d->listener, as);
3272}
3273
3274void address_space_destroy_dispatch(AddressSpace *as)
3275{
3276    AddressSpaceDispatch *d = as->dispatch;
3277
3278    memory_listener_unregister(&d->listener);
3279    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3280    g_free(d);
3281    as->dispatch = NULL;
3282}
3283
3284static void memory_map_init(void)
3285{
3286    system_memory = g_malloc(sizeof(*system_memory));
3287    memory_region_init(system_memory, "system", INT64_MAX);
3288    address_space_init(&address_space_memory, system_memory);
3289    address_space_memory.name = "memory";
3290
3291    system_io = g_malloc(sizeof(*system_io));
3292    memory_region_init(system_io, "io", 65536);
3293    address_space_init(&address_space_io, system_io);
3294    address_space_io.name = "I/O";
3295
3296    memory_listener_register(&core_memory_listener, &address_space_memory);
3297    memory_listener_register(&io_memory_listener, &address_space_io);
3298    memory_listener_register(&tcg_memory_listener, &address_space_memory);
3299
3300    dma_context_init(&dma_context_memory, &address_space_memory,
3301                     NULL, NULL, NULL);
3302}
3303
3304MemoryRegion *get_system_memory(void)
3305{
3306    return system_memory;
3307}
3308
3309MemoryRegion *get_system_io(void)
3310{
3311    return system_io;
3312}
3313
3314#endif /* !defined(CONFIG_USER_ONLY) */
3315
3316/* physical memory access (slow version, mainly for debug) */
3317#if defined(CONFIG_USER_ONLY)
3318int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3319                        uint8_t *buf, int len, int is_write)
3320{
3321    int l, flags;
3322    target_ulong page;
3323    void * p;
3324
3325    while (len > 0) {
3326        page = addr & TARGET_PAGE_MASK;
3327        l = (page + TARGET_PAGE_SIZE) - addr;
3328        if (l > len)
3329            l = len;
3330        flags = page_get_flags(page);
3331        if (!(flags & PAGE_VALID))
3332            return -1;
3333        if (is_write) {
3334            if (!(flags & PAGE_WRITE))
3335                return -1;
3336            /* XXX: this code should not depend on lock_user */
3337            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3338                return -1;
3339            memcpy(p, buf, l);
3340            unlock_user(p, addr, l);
3341        } else {
3342            if (!(flags & PAGE_READ))
3343                return -1;
3344            /* XXX: this code should not depend on lock_user */
3345            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3346                return -1;
3347            memcpy(buf, p, l);
3348            unlock_user(p, addr, 0);
3349        }
3350        len -= l;
3351        buf += l;
3352        addr += l;
3353    }
3354    return 0;
3355}
3356
3357#else
3358
3359static void invalidate_and_set_dirty(hwaddr addr,
3360                                     hwaddr length)
3361{
3362    if (!cpu_physical_memory_is_dirty(addr)) {
3363        /* invalidate code */
3364        tb_invalidate_phys_page_range(addr, addr + length, 0);
3365        /* set dirty bit */
3366        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3367    }
3368    xen_modified_memory(addr, length);
3369}
3370
3371void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3372                      int len, bool is_write)
3373{
3374    AddressSpaceDispatch *d = as->dispatch;
3375    int l;
3376    uint8_t *ptr;
3377    uint32_t val;
3378    hwaddr page;
3379    MemoryRegionSection *section;
3380
3381    while (len > 0) {
3382        page = addr & TARGET_PAGE_MASK;
3383        l = (page + TARGET_PAGE_SIZE) - addr;
3384        if (l > len)
3385            l = len;
3386        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3387
3388        if (is_write) {
3389            if (!memory_region_is_ram(section->mr)) {
3390                hwaddr addr1;
3391                addr1 = memory_region_section_addr(section, addr);
3392                /* XXX: could force cpu_single_env to NULL to avoid
3393                   potential bugs */
3394                if (l >= 4 && ((addr1 & 3) == 0)) {
3395                    /* 32 bit write access */
3396                    val = ldl_p(buf);
3397                    io_mem_write(section->mr, addr1, val, 4);
3398                    l = 4;
3399                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3400                    /* 16 bit write access */
3401                    val = lduw_p(buf);
3402                    io_mem_write(section->mr, addr1, val, 2);
3403                    l = 2;
3404                } else {
3405                    /* 8 bit write access */
3406                    val = ldub_p(buf);
3407                    io_mem_write(section->mr, addr1, val, 1);
3408                    l = 1;
3409                }
3410            } else if (!section->readonly) {
3411                ram_addr_t addr1;
3412                addr1 = memory_region_get_ram_addr(section->mr)
3413                    + memory_region_section_addr(section, addr);
3414                /* RAM case */
3415                ptr = qemu_get_ram_ptr(addr1);
3416                memcpy(ptr, buf, l);
3417                invalidate_and_set_dirty(addr1, l);
3418                qemu_put_ram_ptr(ptr);
3419            }
3420        } else {
3421            if (!(memory_region_is_ram(section->mr) ||
3422                  memory_region_is_romd(section->mr))) {
3423                hwaddr addr1;
3424                /* I/O case */
3425                addr1 = memory_region_section_addr(section, addr);
3426                if (l >= 4 && ((addr1 & 3) == 0)) {
3427                    /* 32 bit read access */
3428                    val = io_mem_read(section->mr, addr1, 4);
3429                    stl_p(buf, val);
3430                    l = 4;
3431                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3432                    /* 16 bit read access */
3433                    val = io_mem_read(section->mr, addr1, 2);
3434                    stw_p(buf, val);
3435                    l = 2;
3436                } else {
3437                    /* 8 bit read access */
3438                    val = io_mem_read(section->mr, addr1, 1);
3439                    stb_p(buf, val);
3440                    l = 1;
3441                }
3442            } else {
3443                /* RAM case */
3444                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3445                                       + memory_region_section_addr(section,
3446                                                                    addr));
3447                memcpy(buf, ptr, l);
3448                qemu_put_ram_ptr(ptr);
3449            }
3450        }
3451        len -= l;
3452        buf += l;
3453        addr += l;
3454    }
3455}
3456
3457void address_space_write(AddressSpace *as, hwaddr addr,
3458                         const uint8_t *buf, int len)
3459{
3460    address_space_rw(as, addr, (uint8_t *)buf, len, true);
3461}
3462
3463/**
3464 * address_space_read: read from an address space.
3465 *
3466 * @as: #AddressSpace to be accessed
3467 * @addr: address within that address space
3468 * @buf: buffer with the data transferred
3469 */
3470void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3471{
3472    address_space_rw(as, addr, buf, len, false);
3473}
3474
3475
3476void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3477                            int len, int is_write)
3478{
3479    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3480}
3481
3482/* used for ROM loading : can write in RAM and ROM */
3483void cpu_physical_memory_write_rom(hwaddr addr,
3484                                   const uint8_t *buf, int len)
3485{
3486    AddressSpaceDispatch *d = address_space_memory.dispatch;
3487    int l;
3488    uint8_t *ptr;
3489    hwaddr page;
3490    MemoryRegionSection *section;
3491
3492    while (len > 0) {
3493        page = addr & TARGET_PAGE_MASK;
3494        l = (page + TARGET_PAGE_SIZE) - addr;
3495        if (l > len)
3496            l = len;
3497        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3498
3499        if (!(memory_region_is_ram(section->mr) ||
3500              memory_region_is_romd(section->mr))) {
3501            /* do nothing */
3502        } else {
3503            unsigned long addr1;
3504            addr1 = memory_region_get_ram_addr(section->mr)
3505                + memory_region_section_addr(section, addr);
3506            /* ROM/RAM case */
3507            ptr = qemu_get_ram_ptr(addr1);
3508            memcpy(ptr, buf, l);
3509            invalidate_and_set_dirty(addr1, l);
3510            qemu_put_ram_ptr(ptr);
3511        }
3512        len -= l;
3513        buf += l;
3514        addr += l;
3515    }
3516}
3517
3518typedef struct {
3519    void *buffer;
3520    hwaddr addr;
3521    hwaddr len;
3522} BounceBuffer;
3523
3524static BounceBuffer bounce;
3525
3526typedef struct MapClient {
3527    void *opaque;
3528    void (*callback)(void *opaque);
3529    QLIST_ENTRY(MapClient) link;
3530} MapClient;
3531
3532static QLIST_HEAD(map_client_list, MapClient) map_client_list
3533    = QLIST_HEAD_INITIALIZER(map_client_list);
3534
3535void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3536{
3537    MapClient *client = g_malloc(sizeof(*client));
3538
3539    client->opaque = opaque;
3540    client->callback = callback;
3541    QLIST_INSERT_HEAD(&map_client_list, client, link);
3542    return client;
3543}
3544
3545static void cpu_unregister_map_client(void *_client)
3546{
3547    MapClient *client = (MapClient *)_client;
3548
3549    QLIST_REMOVE(client, link);
3550    g_free(client);
3551}
3552
3553static void cpu_notify_map_clients(void)
3554{
3555    MapClient *client;
3556
3557    while (!QLIST_EMPTY(&map_client_list)) {
3558        client = QLIST_FIRST(&map_client_list);
3559        client->callback(client->opaque);
3560        cpu_unregister_map_client(client);
3561    }
3562}
3563
3564/* Map a physical memory region into a host virtual address.
3565 * May map a subset of the requested range, given by and returned in *plen.
3566 * May return NULL if resources needed to perform the mapping are exhausted.
3567 * Use only for reads OR writes - not for read-modify-write operations.
3568 * Use cpu_register_map_client() to know when retrying the map operation is
3569 * likely to succeed.
3570 */
3571void *address_space_map(AddressSpace *as,
3572                        hwaddr addr,
3573                        hwaddr *plen,
3574                        bool is_write)
3575{
3576    AddressSpaceDispatch *d = as->dispatch;
3577    hwaddr len = *plen;
3578    hwaddr todo = 0;
3579    int l;
3580    hwaddr page;
3581    MemoryRegionSection *section;
3582    ram_addr_t raddr = RAM_ADDR_MAX;
3583    ram_addr_t rlen;
3584    void *ret;
3585
3586    while (len > 0) {
3587        page = addr & TARGET_PAGE_MASK;
3588        l = (page + TARGET_PAGE_SIZE) - addr;
3589        if (l > len)
3590            l = len;
3591        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3592
3593        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3594            if (todo || bounce.buffer) {
3595                break;
3596            }
3597            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3598            bounce.addr = addr;
3599            bounce.len = l;
3600            if (!is_write) {
3601                address_space_read(as, addr, bounce.buffer, l);
3602            }
3603
3604            *plen = l;
3605            return bounce.buffer;
3606        }
3607        if (!todo) {
3608            raddr = memory_region_get_ram_addr(section->mr)
3609                + memory_region_section_addr(section, addr);
3610        }
3611
3612        len -= l;
3613        addr += l;
3614        todo += l;
3615    }
3616    rlen = todo;
3617    ret = qemu_ram_ptr_length(raddr, &rlen);
3618    *plen = rlen;
3619    return ret;
3620}
3621
3622/* Unmaps a memory region previously mapped by address_space_map().
3623 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3624 * the amount of memory that was actually read or written by the caller.
3625 */
3626void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3627                         int is_write, hwaddr access_len)
3628{
3629    if (buffer != bounce.buffer) {
3630        if (is_write) {
3631            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3632            while (access_len) {
3633                unsigned l;
3634                l = TARGET_PAGE_SIZE;
3635                if (l > access_len)
3636                    l = access_len;
3637                invalidate_and_set_dirty(addr1, l);
3638                addr1 += l;
3639                access_len -= l;
3640            }
3641        }
3642        if (xen_enabled()) {
3643            xen_invalidate_map_cache_entry(buffer);
3644        }
3645        return;
3646    }
3647    if (is_write) {
3648        address_space_write(as, bounce.addr, bounce.buffer, access_len);
3649    }
3650    qemu_vfree(bounce.buffer);
3651    bounce.buffer = NULL;
3652    cpu_notify_map_clients();
3653}
3654
3655void *cpu_physical_memory_map(hwaddr addr,
3656                              hwaddr *plen,
3657                              int is_write)
3658{
3659    return address_space_map(&address_space_memory, addr, plen, is_write);
3660}
3661
3662void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3663                               int is_write, hwaddr access_len)
3664{
3665    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3666}
3667
3668/* warning: addr must be aligned */
3669static inline uint32_t ldl_phys_internal(hwaddr addr,
3670                                         enum device_endian endian)
3671{
3672    uint8_t *ptr;
3673    uint32_t val;
3674    MemoryRegionSection *section;
3675
3676    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3677
3678    if (!(memory_region_is_ram(section->mr) ||
3679          memory_region_is_romd(section->mr))) {
3680        /* I/O case */
3681        addr = memory_region_section_addr(section, addr);
3682        val = io_mem_read(section->mr, addr, 4);
3683#if defined(TARGET_WORDS_BIGENDIAN)
3684        if (endian == DEVICE_LITTLE_ENDIAN) {
3685            val = bswap32(val);
3686        }
3687#else
3688        if (endian == DEVICE_BIG_ENDIAN) {
3689            val = bswap32(val);
3690        }
3691#endif
3692    } else {
3693        /* RAM case */
3694        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3695                                & TARGET_PAGE_MASK)
3696                               + memory_region_section_addr(section, addr));
3697        switch (endian) {
3698        case DEVICE_LITTLE_ENDIAN:
3699            val = ldl_le_p(ptr);
3700            break;
3701        case DEVICE_BIG_ENDIAN:
3702            val = ldl_be_p(ptr);
3703            break;
3704        default:
3705            val = ldl_p(ptr);
3706            break;
3707        }
3708    }
3709    return val;
3710}
3711
3712uint32_t ldl_phys(hwaddr addr)
3713{
3714    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3715}
3716
3717uint32_t ldl_le_phys(hwaddr addr)
3718{
3719    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3720}
3721
3722uint32_t ldl_be_phys(hwaddr addr)
3723{
3724    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3725}
3726
3727/* warning: addr must be aligned */
3728static inline uint64_t ldq_phys_internal(hwaddr addr,
3729                                         enum device_endian endian)
3730{
3731    uint8_t *ptr;
3732    uint64_t val;
3733    MemoryRegionSection *section;
3734
3735    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3736
3737    if (!(memory_region_is_ram(section->mr) ||
3738          memory_region_is_romd(section->mr))) {
3739        /* I/O case */
3740        addr = memory_region_section_addr(section, addr);
3741
3742        /* XXX This is broken when device endian != cpu endian.
3743               Fix and add "endian" variable check */
3744#ifdef TARGET_WORDS_BIGENDIAN
3745        val = io_mem_read(section->mr, addr, 4) << 32;
3746        val |= io_mem_read(section->mr, addr + 4, 4);
3747#else
3748        val = io_mem_read(section->mr, addr, 4);
3749        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3750#endif
3751    } else {
3752        /* RAM case */
3753        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3754                                & TARGET_PAGE_MASK)
3755                               + memory_region_section_addr(section, addr));
3756        switch (endian) {
3757        case DEVICE_LITTLE_ENDIAN:
3758            val = ldq_le_p(ptr);
3759            break;
3760        case DEVICE_BIG_ENDIAN:
3761            val = ldq_be_p(ptr);
3762            break;
3763        default:
3764            val = ldq_p(ptr);
3765            break;
3766        }
3767    }
3768    return val;
3769}
3770
3771uint64_t ldq_phys(hwaddr addr)
3772{
3773    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3774}
3775
3776uint64_t ldq_le_phys(hwaddr addr)
3777{
3778    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3779}
3780
3781uint64_t ldq_be_phys(hwaddr addr)
3782{
3783    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3784}
3785
3786/* XXX: optimize */
3787uint32_t ldub_phys(hwaddr addr)
3788{
3789    uint8_t val;
3790    cpu_physical_memory_read(addr, &val, 1);
3791    return val;
3792}
3793
3794/* warning: addr must be aligned */
3795static inline uint32_t lduw_phys_internal(hwaddr addr,
3796                                          enum device_endian endian)
3797{
3798    uint8_t *ptr;
3799    uint64_t val;
3800    MemoryRegionSection *section;
3801
3802    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3803
3804    if (!(memory_region_is_ram(section->mr) ||
3805          memory_region_is_romd(section->mr))) {
3806        /* I/O case */
3807        addr = memory_region_section_addr(section, addr);
3808        val = io_mem_read(section->mr, addr, 2);
3809#if defined(TARGET_WORDS_BIGENDIAN)
3810        if (endian == DEVICE_LITTLE_ENDIAN) {
3811            val = bswap16(val);
3812        }
3813#else
3814        if (endian == DEVICE_BIG_ENDIAN) {
3815            val = bswap16(val);
3816        }
3817#endif
3818    } else {
3819        /* RAM case */
3820        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3821                                & TARGET_PAGE_MASK)
3822                               + memory_region_section_addr(section, addr));
3823        switch (endian) {
3824        case DEVICE_LITTLE_ENDIAN:
3825            val = lduw_le_p(ptr);
3826            break;
3827        case DEVICE_BIG_ENDIAN:
3828            val = lduw_be_p(ptr);
3829            break;
3830        default:
3831            val = lduw_p(ptr);
3832            break;
3833        }
3834    }
3835    return val;
3836}
3837
3838uint32_t lduw_phys(hwaddr addr)
3839{
3840    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3841}
3842
3843uint32_t lduw_le_phys(hwaddr addr)
3844{
3845    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3846}
3847
3848uint32_t lduw_be_phys(hwaddr addr)
3849{
3850    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3851}
3852
3853/* warning: addr must be aligned. The ram page is not masked as dirty
3854   and the code inside is not invalidated. It is useful if the dirty
3855   bits are used to track modified PTEs */
3856void stl_phys_notdirty(hwaddr addr, uint32_t val)
3857{
3858    uint8_t *ptr;
3859    MemoryRegionSection *section;
3860
3861    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3862
3863    if (!memory_region_is_ram(section->mr) || section->readonly) {
3864        addr = memory_region_section_addr(section, addr);
3865        if (memory_region_is_ram(section->mr)) {
3866            section = &phys_sections[phys_section_rom];
3867        }
3868        io_mem_write(section->mr, addr, val, 4);
3869    } else {
3870        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3871                               & TARGET_PAGE_MASK)
3872            + memory_region_section_addr(section, addr);
3873        ptr = qemu_get_ram_ptr(addr1);
3874        stl_p(ptr, val);
3875
3876        if (unlikely(in_migration)) {
3877            if (!cpu_physical_memory_is_dirty(addr1)) {
3878                /* invalidate code */
3879                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3880                /* set dirty bit */
3881                cpu_physical_memory_set_dirty_flags(
3882                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3883            }
3884        }
3885    }
3886}
3887
3888void stq_phys_notdirty(hwaddr addr, uint64_t val)
3889{
3890    uint8_t *ptr;
3891    MemoryRegionSection *section;
3892
3893    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3894
3895    if (!memory_region_is_ram(section->mr) || section->readonly) {
3896        addr = memory_region_section_addr(section, addr);
3897        if (memory_region_is_ram(section->mr)) {
3898            section = &phys_sections[phys_section_rom];
3899        }
3900#ifdef TARGET_WORDS_BIGENDIAN
3901        io_mem_write(section->mr, addr, val >> 32, 4);
3902        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3903#else
3904        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3905        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3906#endif
3907    } else {
3908        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3909                                & TARGET_PAGE_MASK)
3910                               + memory_region_section_addr(section, addr));
3911        stq_p(ptr, val);
3912    }
3913}
3914
3915/* warning: addr must be aligned */
3916static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3917                                     enum device_endian endian)
3918{
3919    uint8_t *ptr;
3920    MemoryRegionSection *section;
3921
3922    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3923
3924    if (!memory_region_is_ram(section->mr) || section->readonly) {
3925        addr = memory_region_section_addr(section, addr);
3926        if (memory_region_is_ram(section->mr)) {
3927            section = &phys_sections[phys_section_rom];
3928        }
3929#if defined(TARGET_WORDS_BIGENDIAN)
3930        if (endian == DEVICE_LITTLE_ENDIAN) {
3931            val = bswap32(val);
3932        }
3933#else
3934        if (endian == DEVICE_BIG_ENDIAN) {
3935            val = bswap32(val);
3936        }
3937#endif
3938        io_mem_write(section->mr, addr, val, 4);
3939    } else {
3940        unsigned long addr1;
3941        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3942            + memory_region_section_addr(section, addr);
3943        /* RAM case */
3944        ptr = qemu_get_ram_ptr(addr1);
3945        switch (endian) {
3946        case DEVICE_LITTLE_ENDIAN:
3947            stl_le_p(ptr, val);
3948            break;
3949        case DEVICE_BIG_ENDIAN:
3950            stl_be_p(ptr, val);
3951            break;
3952        default:
3953            stl_p(ptr, val);
3954            break;
3955        }
3956        invalidate_and_set_dirty(addr1, 4);
3957    }
3958}
3959
3960void stl_phys(hwaddr addr, uint32_t val)
3961{
3962    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3963}
3964
3965void stl_le_phys(hwaddr addr, uint32_t val)
3966{
3967    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3968}
3969
3970void stl_be_phys(hwaddr addr, uint32_t val)
3971{
3972    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3973}
3974
3975/* XXX: optimize */
3976void stb_phys(hwaddr addr, uint32_t val)
3977{
3978    uint8_t v = val;
3979    cpu_physical_memory_write(addr, &v, 1);
3980}
3981
3982/* warning: addr must be aligned */
3983static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3984                                     enum device_endian endian)
3985{
3986    uint8_t *ptr;
3987    MemoryRegionSection *section;
3988
3989    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3990
3991    if (!memory_region_is_ram(section->mr) || section->readonly) {
3992        addr = memory_region_section_addr(section, addr);
3993        if (memory_region_is_ram(section->mr)) {
3994            section = &phys_sections[phys_section_rom];
3995        }
3996#if defined(TARGET_WORDS_BIGENDIAN)
3997        if (endian == DEVICE_LITTLE_ENDIAN) {
3998            val = bswap16(val);
3999        }
4000#else
4001        if (endian == DEVICE_BIG_ENDIAN) {
4002            val = bswap16(val);
4003        }
4004#endif
4005        io_mem_write(section->mr, addr, val, 2);
4006    } else {
4007        unsigned long addr1;
4008        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4009            + memory_region_section_addr(section, addr);
4010        /* RAM case */
4011        ptr = qemu_get_ram_ptr(addr1);
4012        switch (endian) {
4013        case DEVICE_LITTLE_ENDIAN:
4014            stw_le_p(ptr, val);
4015            break;
4016        case DEVICE_BIG_ENDIAN:
4017            stw_be_p(ptr, val);
4018            break;
4019        default:
4020            stw_p(ptr, val);
4021            break;
4022        }
4023        invalidate_and_set_dirty(addr1, 2);
4024    }
4025}
4026
4027void stw_phys(hwaddr addr, uint32_t val)
4028{
4029    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4030}
4031
4032void stw_le_phys(hwaddr addr, uint32_t val)
4033{
4034    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4035}
4036
4037void stw_be_phys(hwaddr addr, uint32_t val)
4038{
4039    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4040}
4041
4042/* XXX: optimize */
4043void stq_phys(hwaddr addr, uint64_t val)
4044{
4045    val = tswap64(val);
4046    cpu_physical_memory_write(addr, &val, 8);
4047}
4048
4049void stq_le_phys(hwaddr addr, uint64_t val)
4050{
4051    val = cpu_to_le64(val);
4052    cpu_physical_memory_write(addr, &val, 8);
4053}
4054
4055void stq_be_phys(hwaddr addr, uint64_t val)
4056{
4057    val = cpu_to_be64(val);
4058    cpu_physical_memory_write(addr, &val, 8);
4059}
4060
4061/* virtual memory access for debug (includes writing to ROM) */
4062int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4063                        uint8_t *buf, int len, int is_write)
4064{
4065    int l;
4066    hwaddr phys_addr;
4067    target_ulong page;
4068
4069    while (len > 0) {
4070        page = addr & TARGET_PAGE_MASK;
4071        phys_addr = cpu_get_phys_page_debug(env, page);
4072        /* if no physical page mapped, return an error */
4073        if (phys_addr == -1)
4074            return -1;
4075        l = (page + TARGET_PAGE_SIZE) - addr;
4076        if (l > len)
4077            l = len;
4078        phys_addr += (addr & ~TARGET_PAGE_MASK);
4079        if (is_write)
4080            cpu_physical_memory_write_rom(phys_addr, buf, l);
4081        else
4082            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4083        len -= l;
4084        buf += l;
4085        addr += l;
4086    }
4087    return 0;
4088}
4089#endif
4090
4091/* in deterministic execution mode, instructions doing device I/Os
4092   must be at the end of the TB */
4093void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4094{
4095    TranslationBlock *tb;
4096    uint32_t n, cflags;
4097    target_ulong pc, cs_base;
4098    uint64_t flags;
4099
4100    tb = tb_find_pc(retaddr);
4101    if (!tb) {
4102        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4103                  (void *)retaddr);
4104    }
4105    n = env->icount_decr.u16.low + tb->icount;
4106    cpu_restore_state(tb, env, retaddr);
4107    /* Calculate how many instructions had been executed before the fault
4108       occurred.  */
4109    n = n - env->icount_decr.u16.low;
4110    /* Generate a new TB ending on the I/O insn.  */
4111    n++;
4112    /* On MIPS and SH, delay slot instructions can only be restarted if
4113       they were already the first instruction in the TB.  If this is not
4114       the first instruction in a TB then re-execute the preceding
4115       branch.  */
4116#if defined(TARGET_MIPS)
4117    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4118        env->active_tc.PC -= 4;
4119        env->icount_decr.u16.low++;
4120        env->hflags &= ~MIPS_HFLAG_BMASK;
4121    }
4122#elif defined(TARGET_SH4)
4123    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4124            && n > 1) {
4125        env->pc -= 2;
4126        env->icount_decr.u16.low++;
4127        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4128    }
4129#endif
4130    /* This should never happen.  */
4131    if (n > CF_COUNT_MASK)
4132        cpu_abort(env, "TB too big during recompile");
4133
4134    cflags = n | CF_LAST_IO;
4135    pc = tb->pc;
4136    cs_base = tb->cs_base;
4137    flags = tb->flags;
4138    tb_phys_invalidate(tb, -1);
4139    /* FIXME: In theory this could raise an exception.  In practice
4140       we have already translated the block once so it's probably ok.  */
4141    tb_gen_code(env, pc, cs_base, flags, cflags);
4142    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4143       the first in the TB) then we end up generating a whole new TB and
4144       repeating the fault, which is horribly inefficient.
4145       Better would be to execute just this insn uncached, or generate a
4146       second new TB.  */
4147    cpu_resume_from_signal(env, NULL);
4148}
4149
4150#if !defined(CONFIG_USER_ONLY)
4151
4152void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4153{
4154    int i, target_code_size, max_target_code_size;
4155    int direct_jmp_count, direct_jmp2_count, cross_page;
4156    TranslationBlock *tb;
4157
4158    target_code_size = 0;
4159    max_target_code_size = 0;
4160    cross_page = 0;
4161    direct_jmp_count = 0;
4162    direct_jmp2_count = 0;
4163    for(i = 0; i < nb_tbs; i++) {
4164        tb = &tbs[i];
4165        target_code_size += tb->size;
4166        if (tb->size > max_target_code_size)
4167            max_target_code_size = tb->size;
4168        if (tb->page_addr[1] != -1)
4169            cross_page++;
4170        if (tb->tb_next_offset[0] != 0xffff) {
4171            direct_jmp_count++;
4172            if (tb->tb_next_offset[1] != 0xffff) {
4173                direct_jmp2_count++;
4174            }
4175        }
4176    }
4177    /* XXX: avoid using doubles ? */
4178    cpu_fprintf(f, "Translation buffer state:\n");
4179    cpu_fprintf(f, "gen code size       %td/%zd\n",
4180                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4181    cpu_fprintf(f, "TB count            %d/%d\n", 
4182                nb_tbs, code_gen_max_blocks);
4183    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4184                nb_tbs ? target_code_size / nb_tbs : 0,
4185                max_target_code_size);
4186    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4187                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4188                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4189    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4190            cross_page,
4191            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4192    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4193                direct_jmp_count,
4194                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4195                direct_jmp2_count,
4196                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4197    cpu_fprintf(f, "\nStatistics:\n");
4198    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4199    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4200    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4201    tcg_dump_info(f, cpu_fprintf);
4202}
4203
4204/*
4205 * A helper function for the _utterly broken_ virtio device model to find out if
4206 * it's running on a big endian machine. Don't do this at home kids!
4207 */
4208bool virtio_is_big_endian(void);
4209bool virtio_is_big_endian(void)
4210{
4211#if defined(TARGET_WORDS_BIGENDIAN)
4212    return true;
4213#else
4214    return false;
4215#endif
4216}
4217
4218#endif
4219
4220#ifndef CONFIG_USER_ONLY
4221bool cpu_physical_memory_is_io(hwaddr phys_addr)
4222{
4223    MemoryRegionSection *section;
4224
4225    section = phys_page_find(address_space_memory.dispatch,
4226                             phys_addr >> TARGET_PAGE_BITS);
4227
4228    return !(memory_region_is_ram(section->mr) ||
4229             memory_region_is_romd(section->mr));
4230}
4231#endif
4232