qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  virtual page mapping and translated block handling
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifdef _WIN32
  21#include <windows.h>
  22#else
  23#include <sys/types.h>
  24#include <sys/mman.h>
  25#endif
  26
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "exec-all.h"
  30#include "tcg.h"
  31#include "hw/hw.h"
  32#include "hw/qdev.h"
  33#include "osdep.h"
  34#include "kvm.h"
  35#include "qemu-timer.h"
  36#if defined(CONFIG_USER_ONLY)
  37#include <qemu.h>
  38#include <signal.h>
  39#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  40#include <sys/param.h>
  41#if __FreeBSD_version >= 700104
  42#define HAVE_KINFO_GETVMMAP
  43#define sigqueue sigqueue_freebsd  /* avoid redefinition */
  44#include <sys/time.h>
  45#include <sys/proc.h>
  46#include <machine/profile.h>
  47#define _KERNEL
  48#include <sys/user.h>
  49#undef _KERNEL
  50#undef sigqueue
  51#include <libutil.h>
  52#endif
  53#endif
  54#endif
  55
  56//#define DEBUG_TB_INVALIDATE
  57//#define DEBUG_FLUSH
  58//#define DEBUG_TLB
  59//#define DEBUG_UNASSIGNED
  60
  61/* make various TB consistency checks */
  62//#define DEBUG_TB_CHECK
  63//#define DEBUG_TLB_CHECK
  64
  65//#define DEBUG_IOPORT
  66//#define DEBUG_SUBPAGE
  67
  68#if !defined(CONFIG_USER_ONLY)
  69/* TB consistency checks only implemented for usermode emulation.  */
  70#undef DEBUG_TB_CHECK
  71#endif
  72
  73#define SMC_BITMAP_USE_THRESHOLD 10
  74
  75static TranslationBlock *tbs;
  76static int code_gen_max_blocks;
  77TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
  78static int nb_tbs;
  79/* any access to the tbs or the page table must use this lock */
  80spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
  81
  82#if defined(__arm__) || defined(__sparc_v9__)
  83/* The prologue must be reachable with a direct jump. ARM and Sparc64
  84 have limited branch ranges (possibly also PPC) so place it in a
  85 section close to code segment. */
  86#define code_gen_section                                \
  87    __attribute__((__section__(".gen_code")))           \
  88    __attribute__((aligned (32)))
  89#elif defined(_WIN32)
  90/* Maximum alignment for Win32 is 16. */
  91#define code_gen_section                                \
  92    __attribute__((aligned (16)))
  93#else
  94#define code_gen_section                                \
  95    __attribute__((aligned (32)))
  96#endif
  97
  98uint8_t code_gen_prologue[1024] code_gen_section;
  99static uint8_t *code_gen_buffer;
 100static unsigned long code_gen_buffer_size;
 101/* threshold to flush the translated code buffer */
 102static unsigned long code_gen_buffer_max_size;
 103static uint8_t *code_gen_ptr;
 104
 105#if !defined(CONFIG_USER_ONLY)
 106int phys_ram_fd;
 107static int in_migration;
 108
 109RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
 110#endif
 111
 112CPUState *first_cpu;
 113/* current CPU in the current thread. It is only valid inside
 114   cpu_exec() */
 115CPUState *cpu_single_env;
 116/* 0 = Do not count executed instructions.
 117   1 = Precise instruction counting.
 118   2 = Adaptive rate instruction counting.  */
 119int use_icount = 0;
 120/* Current instruction counter.  While executing translated code this may
 121   include some instructions that have not yet been executed.  */
 122int64_t qemu_icount;
 123
 124typedef struct PageDesc {
 125    /* list of TBs intersecting this ram page */
 126    TranslationBlock *first_tb;
 127    /* in order to optimize self modifying code, we count the number
 128       of lookups we do to a given page to use a bitmap */
 129    unsigned int code_write_count;
 130    uint8_t *code_bitmap;
 131#if defined(CONFIG_USER_ONLY)
 132    unsigned long flags;
 133#endif
 134} PageDesc;
 135
 136/* In system mode we want L1_MAP to be based on ram offsets,
 137   while in user mode we want it to be based on virtual addresses.  */
 138#if !defined(CONFIG_USER_ONLY)
 139#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 140# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 141#else
 142# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 143#endif
 144#else
 145# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
 146#endif
 147
 148/* Size of the L2 (and L3, etc) page tables.  */
 149#define L2_BITS 10
 150#define L2_SIZE (1 << L2_BITS)
 151
 152/* The bits remaining after N lower levels of page tables.  */
 153#define P_L1_BITS_REM \
 154    ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
 155#define V_L1_BITS_REM \
 156    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
 157
 158/* Size of the L1 page table.  Avoid silly small sizes.  */
 159#if P_L1_BITS_REM < 4
 160#define P_L1_BITS  (P_L1_BITS_REM + L2_BITS)
 161#else
 162#define P_L1_BITS  P_L1_BITS_REM
 163#endif
 164
 165#if V_L1_BITS_REM < 4
 166#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
 167#else
 168#define V_L1_BITS  V_L1_BITS_REM
 169#endif
 170
 171#define P_L1_SIZE  ((target_phys_addr_t)1 << P_L1_BITS)
 172#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
 173
 174#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
 175#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 176
 177unsigned long qemu_real_host_page_size;
 178unsigned long qemu_host_page_bits;
 179unsigned long qemu_host_page_size;
 180unsigned long qemu_host_page_mask;
 181
 182/* This is a multi-level map on the virtual address space.
 183   The bottom level has pointers to PageDesc.  */
 184static void *l1_map[V_L1_SIZE];
 185
 186#if !defined(CONFIG_USER_ONLY)
 187typedef struct PhysPageDesc {
 188    /* offset in host memory of the page + io_index in the low bits */
 189    ram_addr_t phys_offset;
 190    ram_addr_t region_offset;
 191} PhysPageDesc;
 192
 193/* This is a multi-level map on the physical address space.
 194   The bottom level has pointers to PhysPageDesc.  */
 195static void *l1_phys_map[P_L1_SIZE];
 196
 197static void io_mem_init(void);
 198
 199/* io memory support */
 200CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
 201CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
 202void *io_mem_opaque[IO_MEM_NB_ENTRIES];
 203static char io_mem_used[IO_MEM_NB_ENTRIES];
 204static int io_mem_watch;
 205#endif
 206
 207/* log support */
 208#ifdef WIN32
 209static const char *logfilename = "qemu.log";
 210#else
 211static const char *logfilename = "/tmp/qemu.log";
 212#endif
 213FILE *logfile;
 214int loglevel;
 215static int log_append = 0;
 216
 217/* statistics */
 218#if !defined(CONFIG_USER_ONLY)
 219static int tlb_flush_count;
 220#endif
 221static int tb_flush_count;
 222static int tb_phys_invalidate_count;
 223
 224#ifdef _WIN32
 225static void map_exec(void *addr, long size)
 226{
 227    DWORD old_protect;
 228    VirtualProtect(addr, size,
 229                   PAGE_EXECUTE_READWRITE, &old_protect);
 230    
 231}
 232#else
 233static void map_exec(void *addr, long size)
 234{
 235    unsigned long start, end, page_size;
 236    
 237    page_size = getpagesize();
 238    start = (unsigned long)addr;
 239    start &= ~(page_size - 1);
 240    
 241    end = (unsigned long)addr + size;
 242    end += page_size - 1;
 243    end &= ~(page_size - 1);
 244    
 245    mprotect((void *)start, end - start,
 246             PROT_READ | PROT_WRITE | PROT_EXEC);
 247}
 248#endif
 249
 250static void page_init(void)
 251{
 252    /* NOTE: we can always suppose that qemu_host_page_size >=
 253       TARGET_PAGE_SIZE */
 254#ifdef _WIN32
 255    {
 256        SYSTEM_INFO system_info;
 257
 258        GetSystemInfo(&system_info);
 259        qemu_real_host_page_size = system_info.dwPageSize;
 260    }
 261#else
 262    qemu_real_host_page_size = getpagesize();
 263#endif
 264    if (qemu_host_page_size == 0)
 265        qemu_host_page_size = qemu_real_host_page_size;
 266    if (qemu_host_page_size < TARGET_PAGE_SIZE)
 267        qemu_host_page_size = TARGET_PAGE_SIZE;
 268    qemu_host_page_bits = 0;
 269    while ((1 << qemu_host_page_bits) < qemu_host_page_size)
 270        qemu_host_page_bits++;
 271    qemu_host_page_mask = ~(qemu_host_page_size - 1);
 272
 273#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 274    {
 275#ifdef HAVE_KINFO_GETVMMAP
 276        struct kinfo_vmentry *freep;
 277        int i, cnt;
 278
 279        freep = kinfo_getvmmap(getpid(), &cnt);
 280        if (freep) {
 281            mmap_lock();
 282            for (i = 0; i < cnt; i++) {
 283                unsigned long startaddr, endaddr;
 284
 285                startaddr = freep[i].kve_start;
 286                endaddr = freep[i].kve_end;
 287                if (h2g_valid(startaddr)) {
 288                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 289
 290                    if (h2g_valid(endaddr)) {
 291                        endaddr = h2g(endaddr);
 292                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 293                    } else {
 294#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
 295                        endaddr = ~0ul;
 296                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 297#endif
 298                    }
 299                }
 300            }
 301            free(freep);
 302            mmap_unlock();
 303        }
 304#else
 305        FILE *f;
 306
 307        last_brk = (unsigned long)sbrk(0);
 308
 309        f = fopen("/compat/linux/proc/self/maps", "r");
 310        if (f) {
 311            mmap_lock();
 312
 313            do {
 314                unsigned long startaddr, endaddr;
 315                int n;
 316
 317                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 318
 319                if (n == 2 && h2g_valid(startaddr)) {
 320                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
 321
 322                    if (h2g_valid(endaddr)) {
 323                        endaddr = h2g(endaddr);
 324                    } else {
 325                        endaddr = ~0ul;
 326                    }
 327                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
 328                }
 329            } while (!feof(f));
 330
 331            fclose(f);
 332            mmap_unlock();
 333        }
 334#endif
 335    }
 336#endif
 337}
 338
 339static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 340{
 341    PageDesc *pd;
 342    void **lp;
 343    int i;
 344
 345#if defined(CONFIG_USER_ONLY)
 346    /* We can't use qemu_malloc because it may recurse into a locked mutex. */
 347# define ALLOC(P, SIZE)                                 \
 348    do {                                                \
 349        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
 350                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
 351    } while (0)
 352#else
 353# define ALLOC(P, SIZE) \
 354    do { P = qemu_mallocz(SIZE); } while (0)
 355#endif
 356
 357    /* Level 1.  Always allocated.  */
 358    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
 359
 360    /* Level 2..N-1.  */
 361    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
 362        void **p = *lp;
 363
 364        if (p == NULL) {
 365            if (!alloc) {
 366                return NULL;
 367            }
 368            ALLOC(p, sizeof(void *) * L2_SIZE);
 369            *lp = p;
 370        }
 371
 372        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
 373    }
 374
 375    pd = *lp;
 376    if (pd == NULL) {
 377        if (!alloc) {
 378            return NULL;
 379        }
 380        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
 381        *lp = pd;
 382    }
 383
 384#undef ALLOC
 385
 386    return pd + (index & (L2_SIZE - 1));
 387}
 388
 389static inline PageDesc *page_find(tb_page_addr_t index)
 390{
 391    return page_find_alloc(index, 0);
 392}
 393
 394#if !defined(CONFIG_USER_ONLY)
 395static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
 396{
 397    PhysPageDesc *pd;
 398    void **lp;
 399    int i;
 400
 401    /* Level 1.  Always allocated.  */
 402    lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
 403
 404    /* Level 2..N-1.  */
 405    for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
 406        void **p = *lp;
 407        if (p == NULL) {
 408            if (!alloc) {
 409                return NULL;
 410            }
 411            *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
 412        }
 413        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
 414    }
 415
 416    pd = *lp;
 417    if (pd == NULL) {
 418        int i;
 419
 420        if (!alloc) {
 421            return NULL;
 422        }
 423
 424        *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
 425
 426        for (i = 0; i < L2_SIZE; i++) {
 427            pd[i].phys_offset = IO_MEM_UNASSIGNED;
 428            pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
 429        }
 430    }
 431
 432    return pd + (index & (L2_SIZE - 1));
 433}
 434
 435static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
 436{
 437    return phys_page_find_alloc(index, 0);
 438}
 439
 440static void tlb_protect_code(ram_addr_t ram_addr);
 441static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
 442                                    target_ulong vaddr);
 443#define mmap_lock() do { } while(0)
 444#define mmap_unlock() do { } while(0)
 445#endif
 446
 447#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
 448
 449#if defined(CONFIG_USER_ONLY)
 450/* Currently it is not recommended to allocate big chunks of data in
 451   user mode. It will change when a dedicated libc will be used */
 452#define USE_STATIC_CODE_GEN_BUFFER
 453#endif
 454
 455#ifdef USE_STATIC_CODE_GEN_BUFFER
 456static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 457               __attribute__((aligned (CODE_GEN_ALIGN)));
 458#endif
 459
 460static void code_gen_alloc(unsigned long tb_size)
 461{
 462#ifdef USE_STATIC_CODE_GEN_BUFFER
 463    code_gen_buffer = static_code_gen_buffer;
 464    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 465    map_exec(code_gen_buffer, code_gen_buffer_size);
 466#else
 467    code_gen_buffer_size = tb_size;
 468    if (code_gen_buffer_size == 0) {
 469#if defined(CONFIG_USER_ONLY)
 470        /* in user mode, phys_ram_size is not meaningful */
 471        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 472#else
 473        /* XXX: needs adjustments */
 474        code_gen_buffer_size = (unsigned long)(ram_size / 4);
 475#endif
 476    }
 477    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
 478        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
 479    /* The code gen buffer location may have constraints depending on
 480       the host cpu and OS */
 481#if defined(__linux__) 
 482    {
 483        int flags;
 484        void *start = NULL;
 485
 486        flags = MAP_PRIVATE | MAP_ANONYMOUS;
 487#if defined(__x86_64__)
 488        flags |= MAP_32BIT;
 489        /* Cannot map more than that */
 490        if (code_gen_buffer_size > (800 * 1024 * 1024))
 491            code_gen_buffer_size = (800 * 1024 * 1024);
 492#elif defined(__sparc_v9__)
 493        // Map the buffer below 2G, so we can use direct calls and branches
 494        flags |= MAP_FIXED;
 495        start = (void *) 0x60000000UL;
 496        if (code_gen_buffer_size > (512 * 1024 * 1024))
 497            code_gen_buffer_size = (512 * 1024 * 1024);
 498#elif defined(__arm__)
 499        /* Map the buffer below 32M, so we can use direct calls and branches */
 500        flags |= MAP_FIXED;
 501        start = (void *) 0x01000000UL;
 502        if (code_gen_buffer_size > 16 * 1024 * 1024)
 503            code_gen_buffer_size = 16 * 1024 * 1024;
 504#elif defined(__s390x__)
 505        /* Map the buffer so that we can use direct calls and branches.  */
 506        /* We have a +- 4GB range on the branches; leave some slop.  */
 507        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
 508            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
 509        }
 510        start = (void *)0x90000000UL;
 511#endif
 512        code_gen_buffer = mmap(start, code_gen_buffer_size,
 513                               PROT_WRITE | PROT_READ | PROT_EXEC,
 514                               flags, -1, 0);
 515        if (code_gen_buffer == MAP_FAILED) {
 516            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
 517            exit(1);
 518        }
 519    }
 520#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
 521    || defined(__DragonFly__) || defined(__OpenBSD__)
 522    {
 523        int flags;
 524        void *addr = NULL;
 525        flags = MAP_PRIVATE | MAP_ANONYMOUS;
 526#if defined(__x86_64__)
 527        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
 528         * 0x40000000 is free */
 529        flags |= MAP_FIXED;
 530        addr = (void *)0x40000000;
 531        /* Cannot map more than that */
 532        if (code_gen_buffer_size > (800 * 1024 * 1024))
 533            code_gen_buffer_size = (800 * 1024 * 1024);
 534#elif defined(__sparc_v9__)
 535        // Map the buffer below 2G, so we can use direct calls and branches
 536        flags |= MAP_FIXED;
 537        addr = (void *) 0x60000000UL;
 538        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
 539            code_gen_buffer_size = (512 * 1024 * 1024);
 540        }
 541#endif
 542        code_gen_buffer = mmap(addr, code_gen_buffer_size,
 543                               PROT_WRITE | PROT_READ | PROT_EXEC, 
 544                               flags, -1, 0);
 545        if (code_gen_buffer == MAP_FAILED) {
 546            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
 547            exit(1);
 548        }
 549    }
 550#else
 551    code_gen_buffer = qemu_malloc(code_gen_buffer_size);
 552    map_exec(code_gen_buffer, code_gen_buffer_size);
 553#endif
 554#endif /* !USE_STATIC_CODE_GEN_BUFFER */
 555    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
 556    code_gen_buffer_max_size = code_gen_buffer_size - 
 557        (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
 558    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
 559    tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
 560}
 561
 562/* Must be called before using the QEMU cpus. 'tb_size' is the size
 563   (in bytes) allocated to the translation buffer. Zero means default
 564   size. */
 565void cpu_exec_init_all(unsigned long tb_size)
 566{
 567    cpu_gen_init();
 568    code_gen_alloc(tb_size);
 569    code_gen_ptr = code_gen_buffer;
 570    page_init();
 571#if !defined(CONFIG_USER_ONLY)
 572    io_mem_init();
 573#endif
 574#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
 575    /* There's no guest base to take into account, so go ahead and
 576       initialize the prologue now.  */
 577    tcg_prologue_init(&tcg_ctx);
 578#endif
 579}
 580
 581#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 582
 583static int cpu_common_post_load(void *opaque, int version_id)
 584{
 585    CPUState *env = opaque;
 586
 587    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 588       version_id is increased. */
 589    env->interrupt_request &= ~0x01;
 590    tlb_flush(env, 1);
 591
 592    return 0;
 593}
 594
 595static const VMStateDescription vmstate_cpu_common = {
 596    .name = "cpu_common",
 597    .version_id = 1,
 598    .minimum_version_id = 1,
 599    .minimum_version_id_old = 1,
 600    .post_load = cpu_common_post_load,
 601    .fields      = (VMStateField []) {
 602        VMSTATE_UINT32(halted, CPUState),
 603        VMSTATE_UINT32(interrupt_request, CPUState),
 604        VMSTATE_END_OF_LIST()
 605    }
 606};
 607#endif
 608
 609CPUState *qemu_get_cpu(int cpu)
 610{
 611    CPUState *env = first_cpu;
 612
 613    while (env) {
 614        if (env->cpu_index == cpu)
 615            break;
 616        env = env->next_cpu;
 617    }
 618
 619    return env;
 620}
 621
 622void cpu_exec_init(CPUState *env)
 623{
 624    CPUState **penv;
 625    int cpu_index;
 626
 627#if defined(CONFIG_USER_ONLY)
 628    cpu_list_lock();
 629#endif
 630    env->next_cpu = NULL;
 631    penv = &first_cpu;
 632    cpu_index = 0;
 633    while (*penv != NULL) {
 634        penv = &(*penv)->next_cpu;
 635        cpu_index++;
 636    }
 637    env->cpu_index = cpu_index;
 638    env->numa_node = 0;
 639    QTAILQ_INIT(&env->breakpoints);
 640    QTAILQ_INIT(&env->watchpoints);
 641    *penv = env;
 642#if defined(CONFIG_USER_ONLY)
 643    cpu_list_unlock();
 644#endif
 645#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 646    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
 647    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 648                    cpu_save, cpu_load, env);
 649#endif
 650}
 651
 652static inline void invalidate_page_bitmap(PageDesc *p)
 653{
 654    if (p->code_bitmap) {
 655        qemu_free(p->code_bitmap);
 656        p->code_bitmap = NULL;
 657    }
 658    p->code_write_count = 0;
 659}
 660
 661/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 662
 663static void page_flush_tb_1 (int level, void **lp)
 664{
 665    int i;
 666
 667    if (*lp == NULL) {
 668        return;
 669    }
 670    if (level == 0) {
 671        PageDesc *pd = *lp;
 672        for (i = 0; i < L2_SIZE; ++i) {
 673            pd[i].first_tb = NULL;
 674            invalidate_page_bitmap(pd + i);
 675        }
 676    } else {
 677        void **pp = *lp;
 678        for (i = 0; i < L2_SIZE; ++i) {
 679            page_flush_tb_1 (level - 1, pp + i);
 680        }
 681    }
 682}
 683
 684static void page_flush_tb(void)
 685{
 686    int i;
 687    for (i = 0; i < V_L1_SIZE; i++) {
 688        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
 689    }
 690}
 691
 692/* flush all the translation blocks */
 693/* XXX: tb_flush is currently not thread safe */
 694void tb_flush(CPUState *env1)
 695{
 696    CPUState *env;
 697#if defined(DEBUG_FLUSH)
 698    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
 699           (unsigned long)(code_gen_ptr - code_gen_buffer),
 700           nb_tbs, nb_tbs > 0 ?
 701           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
 702#endif
 703    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
 704        cpu_abort(env1, "Internal error: code buffer overflow\n");
 705
 706    nb_tbs = 0;
 707
 708    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 709        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
 710    }
 711
 712    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
 713    page_flush_tb();
 714
 715    code_gen_ptr = code_gen_buffer;
 716    /* XXX: flush processor icache at this point if cache flush is
 717       expensive */
 718    tb_flush_count++;
 719}
 720
 721#ifdef DEBUG_TB_CHECK
 722
 723static void tb_invalidate_check(target_ulong address)
 724{
 725    TranslationBlock *tb;
 726    int i;
 727    address &= TARGET_PAGE_MASK;
 728    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 729        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 730            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
 731                  address >= tb->pc + tb->size)) {
 732                printf("ERROR invalidate: address=" TARGET_FMT_lx
 733                       " PC=%08lx size=%04x\n",
 734                       address, (long)tb->pc, tb->size);
 735            }
 736        }
 737    }
 738}
 739
 740/* verify that all the pages have correct rights for code */
 741static void tb_page_check(void)
 742{
 743    TranslationBlock *tb;
 744    int i, flags1, flags2;
 745
 746    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
 747        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
 748            flags1 = page_get_flags(tb->pc);
 749            flags2 = page_get_flags(tb->pc + tb->size - 1);
 750            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
 751                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
 752                       (long)tb->pc, tb->size, flags1, flags2);
 753            }
 754        }
 755    }
 756}
 757
 758#endif
 759
 760/* invalidate one TB */
 761static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
 762                             int next_offset)
 763{
 764    TranslationBlock *tb1;
 765    for(;;) {
 766        tb1 = *ptb;
 767        if (tb1 == tb) {
 768            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
 769            break;
 770        }
 771        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
 772    }
 773}
 774
 775static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
 776{
 777    TranslationBlock *tb1;
 778    unsigned int n1;
 779
 780    for(;;) {
 781        tb1 = *ptb;
 782        n1 = (long)tb1 & 3;
 783        tb1 = (TranslationBlock *)((long)tb1 & ~3);
 784        if (tb1 == tb) {
 785            *ptb = tb1->page_next[n1];
 786            break;
 787        }
 788        ptb = &tb1->page_next[n1];
 789    }
 790}
 791
 792static inline void tb_jmp_remove(TranslationBlock *tb, int n)
 793{
 794    TranslationBlock *tb1, **ptb;
 795    unsigned int n1;
 796
 797    ptb = &tb->jmp_next[n];
 798    tb1 = *ptb;
 799    if (tb1) {
 800        /* find tb(n) in circular list */
 801        for(;;) {
 802            tb1 = *ptb;
 803            n1 = (long)tb1 & 3;
 804            tb1 = (TranslationBlock *)((long)tb1 & ~3);
 805            if (n1 == n && tb1 == tb)
 806                break;
 807            if (n1 == 2) {
 808                ptb = &tb1->jmp_first;
 809            } else {
 810                ptb = &tb1->jmp_next[n1];
 811            }
 812        }
 813        /* now we can suppress tb(n) from the list */
 814        *ptb = tb->jmp_next[n];
 815
 816        tb->jmp_next[n] = NULL;
 817    }
 818}
 819
 820/* reset the jump entry 'n' of a TB so that it is not chained to
 821   another TB */
 822static inline void tb_reset_jump(TranslationBlock *tb, int n)
 823{
 824    tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
 825}
 826
 827void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 828{
 829    CPUState *env;
 830    PageDesc *p;
 831    unsigned int h, n1;
 832    tb_page_addr_t phys_pc;
 833    TranslationBlock *tb1, *tb2;
 834
 835    /* remove the TB from the hash list */
 836    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
 837    h = tb_phys_hash_func(phys_pc);
 838    tb_remove(&tb_phys_hash[h], tb,
 839              offsetof(TranslationBlock, phys_hash_next));
 840
 841    /* remove the TB from the page list */
 842    if (tb->page_addr[0] != page_addr) {
 843        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
 844        tb_page_remove(&p->first_tb, tb);
 845        invalidate_page_bitmap(p);
 846    }
 847    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
 848        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
 849        tb_page_remove(&p->first_tb, tb);
 850        invalidate_page_bitmap(p);
 851    }
 852
 853    tb_invalidated_flag = 1;
 854
 855    /* remove the TB from the hash list */
 856    h = tb_jmp_cache_hash_func(tb->pc);
 857    for(env = first_cpu; env != NULL; env = env->next_cpu) {
 858        if (env->tb_jmp_cache[h] == tb)
 859            env->tb_jmp_cache[h] = NULL;
 860    }
 861
 862    /* suppress this TB from the two jump lists */
 863    tb_jmp_remove(tb, 0);
 864    tb_jmp_remove(tb, 1);
 865
 866    /* suppress any remaining jumps to this TB */
 867    tb1 = tb->jmp_first;
 868    for(;;) {
 869        n1 = (long)tb1 & 3;
 870        if (n1 == 2)
 871            break;
 872        tb1 = (TranslationBlock *)((long)tb1 & ~3);
 873        tb2 = tb1->jmp_next[n1];
 874        tb_reset_jump(tb1, n1);
 875        tb1->jmp_next[n1] = NULL;
 876        tb1 = tb2;
 877    }
 878    tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
 879
 880    tb_phys_invalidate_count++;
 881}
 882
 883static inline void set_bits(uint8_t *tab, int start, int len)
 884{
 885    int end, mask, end1;
 886
 887    end = start + len;
 888    tab += start >> 3;
 889    mask = 0xff << (start & 7);
 890    if ((start & ~7) == (end & ~7)) {
 891        if (start < end) {
 892            mask &= ~(0xff << (end & 7));
 893            *tab |= mask;
 894        }
 895    } else {
 896        *tab++ |= mask;
 897        start = (start + 8) & ~7;
 898        end1 = end & ~7;
 899        while (start < end1) {
 900            *tab++ = 0xff;
 901            start += 8;
 902        }
 903        if (start < end) {
 904            mask = ~(0xff << (end & 7));
 905            *tab |= mask;
 906        }
 907    }
 908}
 909
 910static void build_page_bitmap(PageDesc *p)
 911{
 912    int n, tb_start, tb_end;
 913    TranslationBlock *tb;
 914
 915    p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
 916
 917    tb = p->first_tb;
 918    while (tb != NULL) {
 919        n = (long)tb & 3;
 920        tb = (TranslationBlock *)((long)tb & ~3);
 921        /* NOTE: this is subtle as a TB may span two physical pages */
 922        if (n == 0) {
 923            /* NOTE: tb_end may be after the end of the page, but
 924               it is not a problem */
 925            tb_start = tb->pc & ~TARGET_PAGE_MASK;
 926            tb_end = tb_start + tb->size;
 927            if (tb_end > TARGET_PAGE_SIZE)
 928                tb_end = TARGET_PAGE_SIZE;
 929        } else {
 930            tb_start = 0;
 931            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
 932        }
 933        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
 934        tb = tb->page_next[n];
 935    }
 936}
 937
 938TranslationBlock *tb_gen_code(CPUState *env,
 939                              target_ulong pc, target_ulong cs_base,
 940                              int flags, int cflags)
 941{
 942    TranslationBlock *tb;
 943    uint8_t *tc_ptr;
 944    tb_page_addr_t phys_pc, phys_page2;
 945    target_ulong virt_page2;
 946    int code_gen_size;
 947
 948    phys_pc = get_page_addr_code(env, pc);
 949    tb = tb_alloc(pc);
 950    if (!tb) {
 951        /* flush must be done */
 952        tb_flush(env);
 953        /* cannot fail at this point */
 954        tb = tb_alloc(pc);
 955        /* Don't forget to invalidate previous TB info.  */
 956        tb_invalidated_flag = 1;
 957    }
 958    tc_ptr = code_gen_ptr;
 959    tb->tc_ptr = tc_ptr;
 960    tb->cs_base = cs_base;
 961    tb->flags = flags;
 962    tb->cflags = cflags;
 963    cpu_gen_code(env, tb, &code_gen_size);
 964    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
 965
 966    /* check next page if needed */
 967    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
 968    phys_page2 = -1;
 969    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
 970        phys_page2 = get_page_addr_code(env, virt_page2);
 971    }
 972    tb_link_page(tb, phys_pc, phys_page2);
 973    return tb;
 974}
 975
 976/* invalidate all TBs which intersect with the target physical page
 977   starting in range [start;end[. NOTE: start and end must refer to
 978   the same physical page. 'is_cpu_write_access' should be true if called
 979   from a real cpu write access: the virtual CPU will exit the current
 980   TB if code is modified inside this TB. */
 981void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 982                                   int is_cpu_write_access)
 983{
 984    TranslationBlock *tb, *tb_next, *saved_tb;
 985    CPUState *env = cpu_single_env;
 986    tb_page_addr_t tb_start, tb_end;
 987    PageDesc *p;
 988    int n;
 989#ifdef TARGET_HAS_PRECISE_SMC
 990    int current_tb_not_found = is_cpu_write_access;
 991    TranslationBlock *current_tb = NULL;
 992    int current_tb_modified = 0;
 993    target_ulong current_pc = 0;
 994    target_ulong current_cs_base = 0;
 995    int current_flags = 0;
 996#endif /* TARGET_HAS_PRECISE_SMC */
 997
 998    p = page_find(start >> TARGET_PAGE_BITS);
 999    if (!p)
1000        return;
1001    if (!p->code_bitmap &&
1002        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1003        is_cpu_write_access) {
1004        /* build code bitmap */
1005        build_page_bitmap(p);
1006    }
1007
1008    /* we remove all the TBs in the range [start, end[ */
1009    /* XXX: see if in some cases it could be faster to invalidate all the code */
1010    tb = p->first_tb;
1011    while (tb != NULL) {
1012        n = (long)tb & 3;
1013        tb = (TranslationBlock *)((long)tb & ~3);
1014        tb_next = tb->page_next[n];
1015        /* NOTE: this is subtle as a TB may span two physical pages */
1016        if (n == 0) {
1017            /* NOTE: tb_end may be after the end of the page, but
1018               it is not a problem */
1019            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1020            tb_end = tb_start + tb->size;
1021        } else {
1022            tb_start = tb->page_addr[1];
1023            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1024        }
1025        if (!(tb_end <= start || tb_start >= end)) {
1026#ifdef TARGET_HAS_PRECISE_SMC
1027            if (current_tb_not_found) {
1028                current_tb_not_found = 0;
1029                current_tb = NULL;
1030                if (env->mem_io_pc) {
1031                    /* now we have a real cpu fault */
1032                    current_tb = tb_find_pc(env->mem_io_pc);
1033                }
1034            }
1035            if (current_tb == tb &&
1036                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1037                /* If we are modifying the current TB, we must stop
1038                its execution. We could be more precise by checking
1039                that the modification is after the current PC, but it
1040                would require a specialized function to partially
1041                restore the CPU state */
1042
1043                current_tb_modified = 1;
1044                cpu_restore_state(current_tb, env,
1045                                  env->mem_io_pc, NULL);
1046                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1047                                     &current_flags);
1048            }
1049#endif /* TARGET_HAS_PRECISE_SMC */
1050            /* we need to do that to handle the case where a signal
1051               occurs while doing tb_phys_invalidate() */
1052            saved_tb = NULL;
1053            if (env) {
1054                saved_tb = env->current_tb;
1055                env->current_tb = NULL;
1056            }
1057            tb_phys_invalidate(tb, -1);
1058            if (env) {
1059                env->current_tb = saved_tb;
1060                if (env->interrupt_request && env->current_tb)
1061                    cpu_interrupt(env, env->interrupt_request);
1062            }
1063        }
1064        tb = tb_next;
1065    }
1066#if !defined(CONFIG_USER_ONLY)
1067    /* if no code remaining, no need to continue to use slow writes */
1068    if (!p->first_tb) {
1069        invalidate_page_bitmap(p);
1070        if (is_cpu_write_access) {
1071            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1072        }
1073    }
1074#endif
1075#ifdef TARGET_HAS_PRECISE_SMC
1076    if (current_tb_modified) {
1077        /* we generate a block containing just the instruction
1078           modifying the memory. It will ensure that it cannot modify
1079           itself */
1080        env->current_tb = NULL;
1081        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1082        cpu_resume_from_signal(env, NULL);
1083    }
1084#endif
1085}
1086
1087/* len must be <= 8 and start must be a multiple of len */
1088static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1089{
1090    PageDesc *p;
1091    int offset, b;
1092#if 0
1093    if (1) {
1094        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1095                  cpu_single_env->mem_io_vaddr, len,
1096                  cpu_single_env->eip,
1097                  cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1098    }
1099#endif
1100    p = page_find(start >> TARGET_PAGE_BITS);
1101    if (!p)
1102        return;
1103    if (p->code_bitmap) {
1104        offset = start & ~TARGET_PAGE_MASK;
1105        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1106        if (b & ((1 << len) - 1))
1107            goto do_invalidate;
1108    } else {
1109    do_invalidate:
1110        tb_invalidate_phys_page_range(start, start + len, 1);
1111    }
1112}
1113
1114#if !defined(CONFIG_SOFTMMU)
1115static void tb_invalidate_phys_page(tb_page_addr_t addr,
1116                                    unsigned long pc, void *puc)
1117{
1118    TranslationBlock *tb;
1119    PageDesc *p;
1120    int n;
1121#ifdef TARGET_HAS_PRECISE_SMC
1122    TranslationBlock *current_tb = NULL;
1123    CPUState *env = cpu_single_env;
1124    int current_tb_modified = 0;
1125    target_ulong current_pc = 0;
1126    target_ulong current_cs_base = 0;
1127    int current_flags = 0;
1128#endif
1129
1130    addr &= TARGET_PAGE_MASK;
1131    p = page_find(addr >> TARGET_PAGE_BITS);
1132    if (!p)
1133        return;
1134    tb = p->first_tb;
1135#ifdef TARGET_HAS_PRECISE_SMC
1136    if (tb && pc != 0) {
1137        current_tb = tb_find_pc(pc);
1138    }
1139#endif
1140    while (tb != NULL) {
1141        n = (long)tb & 3;
1142        tb = (TranslationBlock *)((long)tb & ~3);
1143#ifdef TARGET_HAS_PRECISE_SMC
1144        if (current_tb == tb &&
1145            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1146                /* If we are modifying the current TB, we must stop
1147                   its execution. We could be more precise by checking
1148                   that the modification is after the current PC, but it
1149                   would require a specialized function to partially
1150                   restore the CPU state */
1151
1152            current_tb_modified = 1;
1153            cpu_restore_state(current_tb, env, pc, puc);
1154            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1155                                 &current_flags);
1156        }
1157#endif /* TARGET_HAS_PRECISE_SMC */
1158        tb_phys_invalidate(tb, addr);
1159        tb = tb->page_next[n];
1160    }
1161    p->first_tb = NULL;
1162#ifdef TARGET_HAS_PRECISE_SMC
1163    if (current_tb_modified) {
1164        /* we generate a block containing just the instruction
1165           modifying the memory. It will ensure that it cannot modify
1166           itself */
1167        env->current_tb = NULL;
1168        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1169        cpu_resume_from_signal(env, puc);
1170    }
1171#endif
1172}
1173#endif
1174
1175/* add the tb in the target page and protect it if necessary */
1176static inline void tb_alloc_page(TranslationBlock *tb,
1177                                 unsigned int n, tb_page_addr_t page_addr)
1178{
1179    PageDesc *p;
1180    TranslationBlock *last_first_tb;
1181
1182    tb->page_addr[n] = page_addr;
1183    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1184    tb->page_next[n] = p->first_tb;
1185    last_first_tb = p->first_tb;
1186    p->first_tb = (TranslationBlock *)((long)tb | n);
1187    invalidate_page_bitmap(p);
1188
1189#if defined(TARGET_HAS_SMC) || 1
1190
1191#if defined(CONFIG_USER_ONLY)
1192    if (p->flags & PAGE_WRITE) {
1193        target_ulong addr;
1194        PageDesc *p2;
1195        int prot;
1196
1197        /* force the host page as non writable (writes will have a
1198           page fault + mprotect overhead) */
1199        page_addr &= qemu_host_page_mask;
1200        prot = 0;
1201        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1202            addr += TARGET_PAGE_SIZE) {
1203
1204            p2 = page_find (addr >> TARGET_PAGE_BITS);
1205            if (!p2)
1206                continue;
1207            prot |= p2->flags;
1208            p2->flags &= ~PAGE_WRITE;
1209          }
1210        mprotect(g2h(page_addr), qemu_host_page_size,
1211                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1212#ifdef DEBUG_TB_INVALIDATE
1213        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1214               page_addr);
1215#endif
1216    }
1217#else
1218    /* if some code is already present, then the pages are already
1219       protected. So we handle the case where only the first TB is
1220       allocated in a physical page */
1221    if (!last_first_tb) {
1222        tlb_protect_code(page_addr);
1223    }
1224#endif
1225
1226#endif /* TARGET_HAS_SMC */
1227}
1228
1229/* Allocate a new translation block. Flush the translation buffer if
1230   too many translation blocks or too much generated code. */
1231TranslationBlock *tb_alloc(target_ulong pc)
1232{
1233    TranslationBlock *tb;
1234
1235    if (nb_tbs >= code_gen_max_blocks ||
1236        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
1237        return NULL;
1238    tb = &tbs[nb_tbs++];
1239    tb->pc = pc;
1240    tb->cflags = 0;
1241    return tb;
1242}
1243
1244void tb_free(TranslationBlock *tb)
1245{
1246    /* In practice this is mostly used for single use temporary TB
1247       Ignore the hard cases and just back up if this TB happens to
1248       be the last one generated.  */
1249    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
1250        code_gen_ptr = tb->tc_ptr;
1251        nb_tbs--;
1252    }
1253}
1254
1255/* add a new TB and link it to the physical page tables. phys_page2 is
1256   (-1) to indicate that only one page contains the TB. */
1257void tb_link_page(TranslationBlock *tb,
1258                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1259{
1260    unsigned int h;
1261    TranslationBlock **ptb;
1262
1263    /* Grab the mmap lock to stop another thread invalidating this TB
1264       before we are done.  */
1265    mmap_lock();
1266    /* add in the physical hash table */
1267    h = tb_phys_hash_func(phys_pc);
1268    ptb = &tb_phys_hash[h];
1269    tb->phys_hash_next = *ptb;
1270    *ptb = tb;
1271
1272    /* add in the page list */
1273    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1274    if (phys_page2 != -1)
1275        tb_alloc_page(tb, 1, phys_page2);
1276    else
1277        tb->page_addr[1] = -1;
1278
1279    tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1280    tb->jmp_next[0] = NULL;
1281    tb->jmp_next[1] = NULL;
1282
1283    /* init original jump addresses */
1284    if (tb->tb_next_offset[0] != 0xffff)
1285        tb_reset_jump(tb, 0);
1286    if (tb->tb_next_offset[1] != 0xffff)
1287        tb_reset_jump(tb, 1);
1288
1289#ifdef DEBUG_TB_CHECK
1290    tb_page_check();
1291#endif
1292    mmap_unlock();
1293}
1294
1295/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1296   tb[1].tc_ptr. Return NULL if not found */
1297TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1298{
1299    int m_min, m_max, m;
1300    unsigned long v;
1301    TranslationBlock *tb;
1302
1303    if (nb_tbs <= 0)
1304        return NULL;
1305    if (tc_ptr < (unsigned long)code_gen_buffer ||
1306        tc_ptr >= (unsigned long)code_gen_ptr)
1307        return NULL;
1308    /* binary search (cf Knuth) */
1309    m_min = 0;
1310    m_max = nb_tbs - 1;
1311    while (m_min <= m_max) {
1312        m = (m_min + m_max) >> 1;
1313        tb = &tbs[m];
1314        v = (unsigned long)tb->tc_ptr;
1315        if (v == tc_ptr)
1316            return tb;
1317        else if (tc_ptr < v) {
1318            m_max = m - 1;
1319        } else {
1320            m_min = m + 1;
1321        }
1322    }
1323    return &tbs[m_max];
1324}
1325
1326static void tb_reset_jump_recursive(TranslationBlock *tb);
1327
1328static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1329{
1330    TranslationBlock *tb1, *tb_next, **ptb;
1331    unsigned int n1;
1332
1333    tb1 = tb->jmp_next[n];
1334    if (tb1 != NULL) {
1335        /* find head of list */
1336        for(;;) {
1337            n1 = (long)tb1 & 3;
1338            tb1 = (TranslationBlock *)((long)tb1 & ~3);
1339            if (n1 == 2)
1340                break;
1341            tb1 = tb1->jmp_next[n1];
1342        }
1343        /* we are now sure now that tb jumps to tb1 */
1344        tb_next = tb1;
1345
1346        /* remove tb from the jmp_first list */
1347        ptb = &tb_next->jmp_first;
1348        for(;;) {
1349            tb1 = *ptb;
1350            n1 = (long)tb1 & 3;
1351            tb1 = (TranslationBlock *)((long)tb1 & ~3);
1352            if (n1 == n && tb1 == tb)
1353                break;
1354            ptb = &tb1->jmp_next[n1];
1355        }
1356        *ptb = tb->jmp_next[n];
1357        tb->jmp_next[n] = NULL;
1358
1359        /* suppress the jump to next tb in generated code */
1360        tb_reset_jump(tb, n);
1361
1362        /* suppress jumps in the tb on which we could have jumped */
1363        tb_reset_jump_recursive(tb_next);
1364    }
1365}
1366
1367static void tb_reset_jump_recursive(TranslationBlock *tb)
1368{
1369    tb_reset_jump_recursive2(tb, 0);
1370    tb_reset_jump_recursive2(tb, 1);
1371}
1372
1373#if defined(TARGET_HAS_ICE)
1374#if defined(CONFIG_USER_ONLY)
1375static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1376{
1377    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1378}
1379#else
1380static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1381{
1382    target_phys_addr_t addr;
1383    target_ulong pd;
1384    ram_addr_t ram_addr;
1385    PhysPageDesc *p;
1386
1387    addr = cpu_get_phys_page_debug(env, pc);
1388    p = phys_page_find(addr >> TARGET_PAGE_BITS);
1389    if (!p) {
1390        pd = IO_MEM_UNASSIGNED;
1391    } else {
1392        pd = p->phys_offset;
1393    }
1394    ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1395    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1396}
1397#endif
1398#endif /* TARGET_HAS_ICE */
1399
1400#if defined(CONFIG_USER_ONLY)
1401void cpu_watchpoint_remove_all(CPUState *env, int mask)
1402
1403{
1404}
1405
1406int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1407                          int flags, CPUWatchpoint **watchpoint)
1408{
1409    return -ENOSYS;
1410}
1411#else
1412/* Add a watchpoint.  */
1413int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1414                          int flags, CPUWatchpoint **watchpoint)
1415{
1416    target_ulong len_mask = ~(len - 1);
1417    CPUWatchpoint *wp;
1418
1419    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1420    if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1421        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1422                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1423        return -EINVAL;
1424    }
1425    wp = qemu_malloc(sizeof(*wp));
1426
1427    wp->vaddr = addr;
1428    wp->len_mask = len_mask;
1429    wp->flags = flags;
1430
1431    /* keep all GDB-injected watchpoints in front */
1432    if (flags & BP_GDB)
1433        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1434    else
1435        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1436
1437    tlb_flush_page(env, addr);
1438
1439    if (watchpoint)
1440        *watchpoint = wp;
1441    return 0;
1442}
1443
1444/* Remove a specific watchpoint.  */
1445int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1446                          int flags)
1447{
1448    target_ulong len_mask = ~(len - 1);
1449    CPUWatchpoint *wp;
1450
1451    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1452        if (addr == wp->vaddr && len_mask == wp->len_mask
1453                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1454            cpu_watchpoint_remove_by_ref(env, wp);
1455            return 0;
1456        }
1457    }
1458    return -ENOENT;
1459}
1460
1461/* Remove a specific watchpoint by reference.  */
1462void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1463{
1464    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1465
1466    tlb_flush_page(env, watchpoint->vaddr);
1467
1468    qemu_free(watchpoint);
1469}
1470
1471/* Remove all matching watchpoints.  */
1472void cpu_watchpoint_remove_all(CPUState *env, int mask)
1473{
1474    CPUWatchpoint *wp, *next;
1475
1476    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1477        if (wp->flags & mask)
1478            cpu_watchpoint_remove_by_ref(env, wp);
1479    }
1480}
1481#endif
1482
1483/* Add a breakpoint.  */
1484int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1485                          CPUBreakpoint **breakpoint)
1486{
1487#if defined(TARGET_HAS_ICE)
1488    CPUBreakpoint *bp;
1489
1490    bp = qemu_malloc(sizeof(*bp));
1491
1492    bp->pc = pc;
1493    bp->flags = flags;
1494
1495    /* keep all GDB-injected breakpoints in front */
1496    if (flags & BP_GDB)
1497        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1498    else
1499        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1500
1501    breakpoint_invalidate(env, pc);
1502
1503    if (breakpoint)
1504        *breakpoint = bp;
1505    return 0;
1506#else
1507    return -ENOSYS;
1508#endif
1509}
1510
1511/* Remove a specific breakpoint.  */
1512int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1513{
1514#if defined(TARGET_HAS_ICE)
1515    CPUBreakpoint *bp;
1516
1517    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1518        if (bp->pc == pc && bp->flags == flags) {
1519            cpu_breakpoint_remove_by_ref(env, bp);
1520            return 0;
1521        }
1522    }
1523    return -ENOENT;
1524#else
1525    return -ENOSYS;
1526#endif
1527}
1528
1529/* Remove a specific breakpoint by reference.  */
1530void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1531{
1532#if defined(TARGET_HAS_ICE)
1533    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1534
1535    breakpoint_invalidate(env, breakpoint->pc);
1536
1537    qemu_free(breakpoint);
1538#endif
1539}
1540
1541/* Remove all matching breakpoints. */
1542void cpu_breakpoint_remove_all(CPUState *env, int mask)
1543{
1544#if defined(TARGET_HAS_ICE)
1545    CPUBreakpoint *bp, *next;
1546
1547    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1548        if (bp->flags & mask)
1549            cpu_breakpoint_remove_by_ref(env, bp);
1550    }
1551#endif
1552}
1553
1554/* enable or disable single step mode. EXCP_DEBUG is returned by the
1555   CPU loop after each instruction */
1556void cpu_single_step(CPUState *env, int enabled)
1557{
1558#if defined(TARGET_HAS_ICE)
1559    if (env->singlestep_enabled != enabled) {
1560        env->singlestep_enabled = enabled;
1561        if (kvm_enabled())
1562            kvm_update_guest_debug(env, 0);
1563        else {
1564            /* must flush all the translated code to avoid inconsistencies */
1565            /* XXX: only flush what is necessary */
1566            tb_flush(env);
1567        }
1568    }
1569#endif
1570}
1571
1572/* enable or disable low levels log */
1573void cpu_set_log(int log_flags)
1574{
1575    loglevel = log_flags;
1576    if (loglevel && !logfile) {
1577        logfile = fopen(logfilename, log_append ? "a" : "w");
1578        if (!logfile) {
1579            perror(logfilename);
1580            _exit(1);
1581        }
1582#if !defined(CONFIG_SOFTMMU)
1583        /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1584        {
1585            static char logfile_buf[4096];
1586            setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1587        }
1588#elif !defined(_WIN32)
1589        /* Win32 doesn't support line-buffering and requires size >= 2 */
1590        setvbuf(logfile, NULL, _IOLBF, 0);
1591#endif
1592        log_append = 1;
1593    }
1594    if (!loglevel && logfile) {
1595        fclose(logfile);
1596        logfile = NULL;
1597    }
1598}
1599
1600void cpu_set_log_filename(const char *filename)
1601{
1602    logfilename = strdup(filename);
1603    if (logfile) {
1604        fclose(logfile);
1605        logfile = NULL;
1606    }
1607    cpu_set_log(loglevel);
1608}
1609
1610static void cpu_unlink_tb(CPUState *env)
1611{
1612    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1613       problem and hope the cpu will stop of its own accord.  For userspace
1614       emulation this often isn't actually as bad as it sounds.  Often
1615       signals are used primarily to interrupt blocking syscalls.  */
1616    TranslationBlock *tb;
1617    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1618
1619    spin_lock(&interrupt_lock);
1620    tb = env->current_tb;
1621    /* if the cpu is currently executing code, we must unlink it and
1622       all the potentially executing TB */
1623    if (tb) {
1624        env->current_tb = NULL;
1625        tb_reset_jump_recursive(tb);
1626    }
1627    spin_unlock(&interrupt_lock);
1628}
1629
1630/* mask must never be zero, except for A20 change call */
1631void cpu_interrupt(CPUState *env, int mask)
1632{
1633    int old_mask;
1634
1635    old_mask = env->interrupt_request;
1636    env->interrupt_request |= mask;
1637
1638#ifndef CONFIG_USER_ONLY
1639    /*
1640     * If called from iothread context, wake the target cpu in
1641     * case its halted.
1642     */
1643    if (!qemu_cpu_self(env)) {
1644        qemu_cpu_kick(env);
1645        return;
1646    }
1647#endif
1648
1649    if (use_icount) {
1650        env->icount_decr.u16.high = 0xffff;
1651#ifndef CONFIG_USER_ONLY
1652        if (!can_do_io(env)
1653            && (mask & ~old_mask) != 0) {
1654            cpu_abort(env, "Raised interrupt while not in I/O function");
1655        }
1656#endif
1657    } else {
1658        cpu_unlink_tb(env);
1659    }
1660}
1661
1662void cpu_reset_interrupt(CPUState *env, int mask)
1663{
1664    env->interrupt_request &= ~mask;
1665}
1666
1667void cpu_exit(CPUState *env)
1668{
1669    env->exit_request = 1;
1670    cpu_unlink_tb(env);
1671}
1672
1673const CPULogItem cpu_log_items[] = {
1674    { CPU_LOG_TB_OUT_ASM, "out_asm",
1675      "show generated host assembly code for each compiled TB" },
1676    { CPU_LOG_TB_IN_ASM, "in_asm",
1677      "show target assembly code for each compiled TB" },
1678    { CPU_LOG_TB_OP, "op",
1679      "show micro ops for each compiled TB" },
1680    { CPU_LOG_TB_OP_OPT, "op_opt",
1681      "show micro ops "
1682#ifdef TARGET_I386
1683      "before eflags optimization and "
1684#endif
1685      "after liveness analysis" },
1686    { CPU_LOG_INT, "int",
1687      "show interrupts/exceptions in short format" },
1688    { CPU_LOG_EXEC, "exec",
1689      "show trace before each executed TB (lots of logs)" },
1690    { CPU_LOG_TB_CPU, "cpu",
1691      "show CPU state before block translation" },
1692#ifdef TARGET_I386
1693    { CPU_LOG_PCALL, "pcall",
1694      "show protected mode far calls/returns/exceptions" },
1695    { CPU_LOG_RESET, "cpu_reset",
1696      "show CPU state before CPU resets" },
1697#endif
1698#ifdef DEBUG_IOPORT
1699    { CPU_LOG_IOPORT, "ioport",
1700      "show all i/o ports accesses" },
1701#endif
1702    { 0, NULL, NULL },
1703};
1704
1705#ifndef CONFIG_USER_ONLY
1706static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1707    = QLIST_HEAD_INITIALIZER(memory_client_list);
1708
1709static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1710                                  ram_addr_t size,
1711                                  ram_addr_t phys_offset)
1712{
1713    CPUPhysMemoryClient *client;
1714    QLIST_FOREACH(client, &memory_client_list, list) {
1715        client->set_memory(client, start_addr, size, phys_offset);
1716    }
1717}
1718
1719static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1720                                        target_phys_addr_t end)
1721{
1722    CPUPhysMemoryClient *client;
1723    QLIST_FOREACH(client, &memory_client_list, list) {
1724        int r = client->sync_dirty_bitmap(client, start, end);
1725        if (r < 0)
1726            return r;
1727    }
1728    return 0;
1729}
1730
1731static int cpu_notify_migration_log(int enable)
1732{
1733    CPUPhysMemoryClient *client;
1734    QLIST_FOREACH(client, &memory_client_list, list) {
1735        int r = client->migration_log(client, enable);
1736        if (r < 0)
1737            return r;
1738    }
1739    return 0;
1740}
1741
1742static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1743                                 int level, void **lp)
1744{
1745    int i;
1746
1747    if (*lp == NULL) {
1748        return;
1749    }
1750    if (level == 0) {
1751        PhysPageDesc *pd = *lp;
1752        for (i = 0; i < L2_SIZE; ++i) {
1753            if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1754                client->set_memory(client, pd[i].region_offset,
1755                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
1756            }
1757        }
1758    } else {
1759        void **pp = *lp;
1760        for (i = 0; i < L2_SIZE; ++i) {
1761            phys_page_for_each_1(client, level - 1, pp + i);
1762        }
1763    }
1764}
1765
1766static void phys_page_for_each(CPUPhysMemoryClient *client)
1767{
1768    int i;
1769    for (i = 0; i < P_L1_SIZE; ++i) {
1770        phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1771                             l1_phys_map + 1);
1772    }
1773}
1774
1775void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1776{
1777    QLIST_INSERT_HEAD(&memory_client_list, client, list);
1778    phys_page_for_each(client);
1779}
1780
1781void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1782{
1783    QLIST_REMOVE(client, list);
1784}
1785#endif
1786
1787static int cmp1(const char *s1, int n, const char *s2)
1788{
1789    if (strlen(s2) != n)
1790        return 0;
1791    return memcmp(s1, s2, n) == 0;
1792}
1793
1794/* takes a comma separated list of log masks. Return 0 if error. */
1795int cpu_str_to_log_mask(const char *str)
1796{
1797    const CPULogItem *item;
1798    int mask;
1799    const char *p, *p1;
1800
1801    p = str;
1802    mask = 0;
1803    for(;;) {
1804        p1 = strchr(p, ',');
1805        if (!p1)
1806            p1 = p + strlen(p);
1807        if(cmp1(p,p1-p,"all")) {
1808            for(item = cpu_log_items; item->mask != 0; item++) {
1809                mask |= item->mask;
1810            }
1811        } else {
1812            for(item = cpu_log_items; item->mask != 0; item++) {
1813                if (cmp1(p, p1 - p, item->name))
1814                    goto found;
1815            }
1816            return 0;
1817        }
1818    found:
1819        mask |= item->mask;
1820        if (*p1 != ',')
1821            break;
1822        p = p1 + 1;
1823    }
1824    return mask;
1825}
1826
1827void cpu_abort(CPUState *env, const char *fmt, ...)
1828{
1829    va_list ap;
1830    va_list ap2;
1831
1832    va_start(ap, fmt);
1833    va_copy(ap2, ap);
1834    fprintf(stderr, "qemu: fatal: ");
1835    vfprintf(stderr, fmt, ap);
1836    fprintf(stderr, "\n");
1837#ifdef TARGET_I386
1838    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1839#else
1840    cpu_dump_state(env, stderr, fprintf, 0);
1841#endif
1842    if (qemu_log_enabled()) {
1843        qemu_log("qemu: fatal: ");
1844        qemu_log_vprintf(fmt, ap2);
1845        qemu_log("\n");
1846#ifdef TARGET_I386
1847        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1848#else
1849        log_cpu_state(env, 0);
1850#endif
1851        qemu_log_flush();
1852        qemu_log_close();
1853    }
1854    va_end(ap2);
1855    va_end(ap);
1856#if defined(CONFIG_USER_ONLY)
1857    {
1858        struct sigaction act;
1859        sigfillset(&act.sa_mask);
1860        act.sa_handler = SIG_DFL;
1861        sigaction(SIGABRT, &act, NULL);
1862    }
1863#endif
1864    abort();
1865}
1866
1867CPUState *cpu_copy(CPUState *env)
1868{
1869    CPUState *new_env = cpu_init(env->cpu_model_str);
1870    CPUState *next_cpu = new_env->next_cpu;
1871    int cpu_index = new_env->cpu_index;
1872#if defined(TARGET_HAS_ICE)
1873    CPUBreakpoint *bp;
1874    CPUWatchpoint *wp;
1875#endif
1876
1877    memcpy(new_env, env, sizeof(CPUState));
1878
1879    /* Preserve chaining and index. */
1880    new_env->next_cpu = next_cpu;
1881    new_env->cpu_index = cpu_index;
1882
1883    /* Clone all break/watchpoints.
1884       Note: Once we support ptrace with hw-debug register access, make sure
1885       BP_CPU break/watchpoints are handled correctly on clone. */
1886    QTAILQ_INIT(&env->breakpoints);
1887    QTAILQ_INIT(&env->watchpoints);
1888#if defined(TARGET_HAS_ICE)
1889    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1890        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1891    }
1892    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1893        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1894                              wp->flags, NULL);
1895    }
1896#endif
1897
1898    return new_env;
1899}
1900
1901#if !defined(CONFIG_USER_ONLY)
1902
1903static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1904{
1905    unsigned int i;
1906
1907    /* Discard jump cache entries for any tb which might potentially
1908       overlap the flushed page.  */
1909    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1910    memset (&env->tb_jmp_cache[i], 0, 
1911            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1912
1913    i = tb_jmp_cache_hash_page(addr);
1914    memset (&env->tb_jmp_cache[i], 0, 
1915            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1916}
1917
1918static CPUTLBEntry s_cputlb_empty_entry = {
1919    .addr_read  = -1,
1920    .addr_write = -1,
1921    .addr_code  = -1,
1922    .addend     = -1,
1923};
1924
1925/* NOTE: if flush_global is true, also flush global entries (not
1926   implemented yet) */
1927void tlb_flush(CPUState *env, int flush_global)
1928{
1929    int i;
1930
1931#if defined(DEBUG_TLB)
1932    printf("tlb_flush:\n");
1933#endif
1934    /* must reset current TB so that interrupts cannot modify the
1935       links while we are modifying them */
1936    env->current_tb = NULL;
1937
1938    for(i = 0; i < CPU_TLB_SIZE; i++) {
1939        int mmu_idx;
1940        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1941            env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1942        }
1943    }
1944
1945    memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1946
1947    env->tlb_flush_addr = -1;
1948    env->tlb_flush_mask = 0;
1949    tlb_flush_count++;
1950}
1951
1952static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1953{
1954    if (addr == (tlb_entry->addr_read &
1955                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1956        addr == (tlb_entry->addr_write &
1957                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1958        addr == (tlb_entry->addr_code &
1959                 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1960        *tlb_entry = s_cputlb_empty_entry;
1961    }
1962}
1963
1964void tlb_flush_page(CPUState *env, target_ulong addr)
1965{
1966    int i;
1967    int mmu_idx;
1968
1969#if defined(DEBUG_TLB)
1970    printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1971#endif
1972    /* Check if we need to flush due to large pages.  */
1973    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1974#if defined(DEBUG_TLB)
1975        printf("tlb_flush_page: forced full flush ("
1976               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1977               env->tlb_flush_addr, env->tlb_flush_mask);
1978#endif
1979        tlb_flush(env, 1);
1980        return;
1981    }
1982    /* must reset current TB so that interrupts cannot modify the
1983       links while we are modifying them */
1984    env->current_tb = NULL;
1985
1986    addr &= TARGET_PAGE_MASK;
1987    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1988    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1989        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
1990
1991    tlb_flush_jmp_cache(env, addr);
1992}
1993
1994/* update the TLBs so that writes to code in the virtual page 'addr'
1995   can be detected */
1996static void tlb_protect_code(ram_addr_t ram_addr)
1997{
1998    cpu_physical_memory_reset_dirty(ram_addr,
1999                                    ram_addr + TARGET_PAGE_SIZE,
2000                                    CODE_DIRTY_FLAG);
2001}
2002
2003/* update the TLB so that writes in physical page 'phys_addr' are no longer
2004   tested for self modifying code */
2005static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2006                                    target_ulong vaddr)
2007{
2008    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2009}
2010
2011static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2012                                         unsigned long start, unsigned long length)
2013{
2014    unsigned long addr;
2015    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2016        addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2017        if ((addr - start) < length) {
2018            tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2019        }
2020    }
2021}
2022
2023/* Note: start and end must be within the same ram block.  */
2024void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2025                                     int dirty_flags)
2026{
2027    CPUState *env;
2028    unsigned long length, start1;
2029    int i;
2030
2031    start &= TARGET_PAGE_MASK;
2032    end = TARGET_PAGE_ALIGN(end);
2033
2034    length = end - start;
2035    if (length == 0)
2036        return;
2037    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2038
2039    /* we modify the TLB cache so that the dirty bit will be set again
2040       when accessing the range */
2041    start1 = (unsigned long)qemu_safe_ram_ptr(start);
2042    /* Chek that we don't span multiple blocks - this breaks the
2043       address comparisons below.  */
2044    if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2045            != (end - 1) - start) {
2046        abort();
2047    }
2048
2049    for(env = first_cpu; env != NULL; env = env->next_cpu) {
2050        int mmu_idx;
2051        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2052            for(i = 0; i < CPU_TLB_SIZE; i++)
2053                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2054                                      start1, length);
2055        }
2056    }
2057}
2058
2059int cpu_physical_memory_set_dirty_tracking(int enable)
2060{
2061    int ret = 0;
2062    in_migration = enable;
2063    ret = cpu_notify_migration_log(!!enable);
2064    return ret;
2065}
2066
2067int cpu_physical_memory_get_dirty_tracking(void)
2068{
2069    return in_migration;
2070}
2071
2072int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2073                                   target_phys_addr_t end_addr)
2074{
2075    int ret;
2076
2077    ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2078    return ret;
2079}
2080
2081static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2082{
2083    ram_addr_t ram_addr;
2084    void *p;
2085
2086    if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2087        p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2088            + tlb_entry->addend);
2089        ram_addr = qemu_ram_addr_from_host_nofail(p);
2090        if (!cpu_physical_memory_is_dirty(ram_addr)) {
2091            tlb_entry->addr_write |= TLB_NOTDIRTY;
2092        }
2093    }
2094}
2095
2096/* update the TLB according to the current state of the dirty bits */
2097void cpu_tlb_update_dirty(CPUState *env)
2098{
2099    int i;
2100    int mmu_idx;
2101    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2102        for(i = 0; i < CPU_TLB_SIZE; i++)
2103            tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2104    }
2105}
2106
2107static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2108{
2109    if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2110        tlb_entry->addr_write = vaddr;
2111}
2112
2113/* update the TLB corresponding to virtual page vaddr
2114   so that it is no longer dirty */
2115static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2116{
2117    int i;
2118    int mmu_idx;
2119
2120    vaddr &= TARGET_PAGE_MASK;
2121    i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2122    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2123        tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2124}
2125
2126/* Our TLB does not support large pages, so remember the area covered by
2127   large pages and trigger a full TLB flush if these are invalidated.  */
2128static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2129                               target_ulong size)
2130{
2131    target_ulong mask = ~(size - 1);
2132
2133    if (env->tlb_flush_addr == (target_ulong)-1) {
2134        env->tlb_flush_addr = vaddr & mask;
2135        env->tlb_flush_mask = mask;
2136        return;
2137    }
2138    /* Extend the existing region to include the new page.
2139       This is a compromise between unnecessary flushes and the cost
2140       of maintaining a full variable size TLB.  */
2141    mask &= env->tlb_flush_mask;
2142    while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2143        mask <<= 1;
2144    }
2145    env->tlb_flush_addr &= mask;
2146    env->tlb_flush_mask = mask;
2147}
2148
2149/* Add a new TLB entry. At most one entry for a given virtual address
2150   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2151   supplied size is only used by tlb_flush_page.  */
2152void tlb_set_page(CPUState *env, target_ulong vaddr,
2153                  target_phys_addr_t paddr, int prot,
2154                  int mmu_idx, target_ulong size)
2155{
2156    PhysPageDesc *p;
2157    unsigned long pd;
2158    unsigned int index;
2159    target_ulong address;
2160    target_ulong code_address;
2161    unsigned long addend;
2162    CPUTLBEntry *te;
2163    CPUWatchpoint *wp;
2164    target_phys_addr_t iotlb;
2165
2166    assert(size >= TARGET_PAGE_SIZE);
2167    if (size != TARGET_PAGE_SIZE) {
2168        tlb_add_large_page(env, vaddr, size);
2169    }
2170    p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2171    if (!p) {
2172        pd = IO_MEM_UNASSIGNED;
2173    } else {
2174        pd = p->phys_offset;
2175    }
2176#if defined(DEBUG_TLB)
2177    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2178           " prot=%x idx=%d pd=0x%08lx\n",
2179           vaddr, paddr, prot, mmu_idx, pd);
2180#endif
2181
2182    address = vaddr;
2183    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2184        /* IO memory case (romd handled later) */
2185        address |= TLB_MMIO;
2186    }
2187    addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2188    if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2189        /* Normal RAM.  */
2190        iotlb = pd & TARGET_PAGE_MASK;
2191        if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2192            iotlb |= IO_MEM_NOTDIRTY;
2193        else
2194            iotlb |= IO_MEM_ROM;
2195    } else {
2196        /* IO handlers are currently passed a physical address.
2197           It would be nice to pass an offset from the base address
2198           of that region.  This would avoid having to special case RAM,
2199           and avoid full address decoding in every device.
2200           We can't use the high bits of pd for this because
2201           IO_MEM_ROMD uses these as a ram address.  */
2202        iotlb = (pd & ~TARGET_PAGE_MASK);
2203        if (p) {
2204            iotlb += p->region_offset;
2205        } else {
2206            iotlb += paddr;
2207        }
2208    }
2209
2210    code_address = address;
2211    /* Make accesses to pages with watchpoints go via the
2212       watchpoint trap routines.  */
2213    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2214        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2215            /* Avoid trapping reads of pages with a write breakpoint. */
2216            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2217                iotlb = io_mem_watch + paddr;
2218                address |= TLB_MMIO;
2219                break;
2220            }
2221        }
2222    }
2223
2224    index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2225    env->iotlb[mmu_idx][index] = iotlb - vaddr;
2226    te = &env->tlb_table[mmu_idx][index];
2227    te->addend = addend - vaddr;
2228    if (prot & PAGE_READ) {
2229        te->addr_read = address;
2230    } else {
2231        te->addr_read = -1;
2232    }
2233
2234    if (prot & PAGE_EXEC) {
2235        te->addr_code = code_address;
2236    } else {
2237        te->addr_code = -1;
2238    }
2239    if (prot & PAGE_WRITE) {
2240        if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2241            (pd & IO_MEM_ROMD)) {
2242            /* Write access calls the I/O callback.  */
2243            te->addr_write = address | TLB_MMIO;
2244        } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2245                   !cpu_physical_memory_is_dirty(pd)) {
2246            te->addr_write = address | TLB_NOTDIRTY;
2247        } else {
2248            te->addr_write = address;
2249        }
2250    } else {
2251        te->addr_write = -1;
2252    }
2253}
2254
2255#else
2256
2257void tlb_flush(CPUState *env, int flush_global)
2258{
2259}
2260
2261void tlb_flush_page(CPUState *env, target_ulong addr)
2262{
2263}
2264
2265/*
2266 * Walks guest process memory "regions" one by one
2267 * and calls callback function 'fn' for each region.
2268 */
2269
2270struct walk_memory_regions_data
2271{
2272    walk_memory_regions_fn fn;
2273    void *priv;
2274    unsigned long start;
2275    int prot;
2276};
2277
2278static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2279                                   abi_ulong end, int new_prot)
2280{
2281    if (data->start != -1ul) {
2282        int rc = data->fn(data->priv, data->start, end, data->prot);
2283        if (rc != 0) {
2284            return rc;
2285        }
2286    }
2287
2288    data->start = (new_prot ? end : -1ul);
2289    data->prot = new_prot;
2290
2291    return 0;
2292}
2293
2294static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2295                                 abi_ulong base, int level, void **lp)
2296{
2297    abi_ulong pa;
2298    int i, rc;
2299
2300    if (*lp == NULL) {
2301        return walk_memory_regions_end(data, base, 0);
2302    }
2303
2304    if (level == 0) {
2305        PageDesc *pd = *lp;
2306        for (i = 0; i < L2_SIZE; ++i) {
2307            int prot = pd[i].flags;
2308
2309            pa = base | (i << TARGET_PAGE_BITS);
2310            if (prot != data->prot) {
2311                rc = walk_memory_regions_end(data, pa, prot);
2312                if (rc != 0) {
2313                    return rc;
2314                }
2315            }
2316        }
2317    } else {
2318        void **pp = *lp;
2319        for (i = 0; i < L2_SIZE; ++i) {
2320            pa = base | ((abi_ulong)i <<
2321                (TARGET_PAGE_BITS + L2_BITS * level));
2322            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2323            if (rc != 0) {
2324                return rc;
2325            }
2326        }
2327    }
2328
2329    return 0;
2330}
2331
2332int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2333{
2334    struct walk_memory_regions_data data;
2335    unsigned long i;
2336
2337    data.fn = fn;
2338    data.priv = priv;
2339    data.start = -1ul;
2340    data.prot = 0;
2341
2342    for (i = 0; i < V_L1_SIZE; i++) {
2343        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2344                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2345        if (rc != 0) {
2346            return rc;
2347        }
2348    }
2349
2350    return walk_memory_regions_end(&data, 0, 0);
2351}
2352
2353static int dump_region(void *priv, abi_ulong start,
2354    abi_ulong end, unsigned long prot)
2355{
2356    FILE *f = (FILE *)priv;
2357
2358    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2359        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2360        start, end, end - start,
2361        ((prot & PAGE_READ) ? 'r' : '-'),
2362        ((prot & PAGE_WRITE) ? 'w' : '-'),
2363        ((prot & PAGE_EXEC) ? 'x' : '-'));
2364
2365    return (0);
2366}
2367
2368/* dump memory mappings */
2369void page_dump(FILE *f)
2370{
2371    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2372            "start", "end", "size", "prot");
2373    walk_memory_regions(f, dump_region);
2374}
2375
2376int page_get_flags(target_ulong address)
2377{
2378    PageDesc *p;
2379
2380    p = page_find(address >> TARGET_PAGE_BITS);
2381    if (!p)
2382        return 0;
2383    return p->flags;
2384}
2385
2386/* Modify the flags of a page and invalidate the code if necessary.
2387   The flag PAGE_WRITE_ORG is positioned automatically depending
2388   on PAGE_WRITE.  The mmap_lock should already be held.  */
2389void page_set_flags(target_ulong start, target_ulong end, int flags)
2390{
2391    target_ulong addr, len;
2392
2393    /* This function should never be called with addresses outside the
2394       guest address space.  If this assert fires, it probably indicates
2395       a missing call to h2g_valid.  */
2396#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2397    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2398#endif
2399    assert(start < end);
2400
2401    start = start & TARGET_PAGE_MASK;
2402    end = TARGET_PAGE_ALIGN(end);
2403
2404    if (flags & PAGE_WRITE) {
2405        flags |= PAGE_WRITE_ORG;
2406    }
2407
2408    for (addr = start, len = end - start;
2409         len != 0;
2410         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2411        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2412
2413        /* If the write protection bit is set, then we invalidate
2414           the code inside.  */
2415        if (!(p->flags & PAGE_WRITE) &&
2416            (flags & PAGE_WRITE) &&
2417            p->first_tb) {
2418            tb_invalidate_phys_page(addr, 0, NULL);
2419        }
2420        p->flags = flags;
2421    }
2422}
2423
2424int page_check_range(target_ulong start, target_ulong len, int flags)
2425{
2426    PageDesc *p;
2427    target_ulong end;
2428    target_ulong addr;
2429
2430    /* This function should never be called with addresses outside the
2431       guest address space.  If this assert fires, it probably indicates
2432       a missing call to h2g_valid.  */
2433#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2434    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2435#endif
2436
2437    if (len == 0) {
2438        return 0;
2439    }
2440    if (start + len - 1 < start) {
2441        /* We've wrapped around.  */
2442        return -1;
2443    }
2444
2445    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2446    start = start & TARGET_PAGE_MASK;
2447
2448    for (addr = start, len = end - start;
2449         len != 0;
2450         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2451        p = page_find(addr >> TARGET_PAGE_BITS);
2452        if( !p )
2453            return -1;
2454        if( !(p->flags & PAGE_VALID) )
2455            return -1;
2456
2457        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2458            return -1;
2459        if (flags & PAGE_WRITE) {
2460            if (!(p->flags & PAGE_WRITE_ORG))
2461                return -1;
2462            /* unprotect the page if it was put read-only because it
2463               contains translated code */
2464            if (!(p->flags & PAGE_WRITE)) {
2465                if (!page_unprotect(addr, 0, NULL))
2466                    return -1;
2467            }
2468            return 0;
2469        }
2470    }
2471    return 0;
2472}
2473
2474/* called from signal handler: invalidate the code and unprotect the
2475   page. Return TRUE if the fault was successfully handled. */
2476int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2477{
2478    unsigned int prot;
2479    PageDesc *p;
2480    target_ulong host_start, host_end, addr;
2481
2482    /* Technically this isn't safe inside a signal handler.  However we
2483       know this only ever happens in a synchronous SEGV handler, so in
2484       practice it seems to be ok.  */
2485    mmap_lock();
2486
2487    p = page_find(address >> TARGET_PAGE_BITS);
2488    if (!p) {
2489        mmap_unlock();
2490        return 0;
2491    }
2492
2493    /* if the page was really writable, then we change its
2494       protection back to writable */
2495    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2496        host_start = address & qemu_host_page_mask;
2497        host_end = host_start + qemu_host_page_size;
2498
2499        prot = 0;
2500        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2501            p = page_find(addr >> TARGET_PAGE_BITS);
2502            p->flags |= PAGE_WRITE;
2503            prot |= p->flags;
2504
2505            /* and since the content will be modified, we must invalidate
2506               the corresponding translated code. */
2507            tb_invalidate_phys_page(addr, pc, puc);
2508#ifdef DEBUG_TB_CHECK
2509            tb_invalidate_check(addr);
2510#endif
2511        }
2512        mprotect((void *)g2h(host_start), qemu_host_page_size,
2513                 prot & PAGE_BITS);
2514
2515        mmap_unlock();
2516        return 1;
2517    }
2518    mmap_unlock();
2519    return 0;
2520}
2521
2522static inline void tlb_set_dirty(CPUState *env,
2523                                 unsigned long addr, target_ulong vaddr)
2524{
2525}
2526#endif /* defined(CONFIG_USER_ONLY) */
2527
2528#if !defined(CONFIG_USER_ONLY)
2529
2530#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2531typedef struct subpage_t {
2532    target_phys_addr_t base;
2533    ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2534    ram_addr_t region_offset[TARGET_PAGE_SIZE];
2535} subpage_t;
2536
2537static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2538                             ram_addr_t memory, ram_addr_t region_offset);
2539static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2540                                ram_addr_t orig_memory,
2541                                ram_addr_t region_offset);
2542#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2543                      need_subpage)                                     \
2544    do {                                                                \
2545        if (addr > start_addr)                                          \
2546            start_addr2 = 0;                                            \
2547        else {                                                          \
2548            start_addr2 = start_addr & ~TARGET_PAGE_MASK;               \
2549            if (start_addr2 > 0)                                        \
2550                need_subpage = 1;                                       \
2551        }                                                               \
2552                                                                        \
2553        if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE)        \
2554            end_addr2 = TARGET_PAGE_SIZE - 1;                           \
2555        else {                                                          \
2556            end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2557            if (end_addr2 < TARGET_PAGE_SIZE - 1)                       \
2558                need_subpage = 1;                                       \
2559        }                                                               \
2560    } while (0)
2561
2562/* register physical memory.
2563   For RAM, 'size' must be a multiple of the target page size.
2564   If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2565   io memory page.  The address used when calling the IO function is
2566   the offset from the start of the region, plus region_offset.  Both
2567   start_addr and region_offset are rounded down to a page boundary
2568   before calculating this offset.  This should not be a problem unless
2569   the low bits of start_addr and region_offset differ.  */
2570void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
2571                                         ram_addr_t size,
2572                                         ram_addr_t phys_offset,
2573                                         ram_addr_t region_offset)
2574{
2575    target_phys_addr_t addr, end_addr;
2576    PhysPageDesc *p;
2577    CPUState *env;
2578    ram_addr_t orig_size = size;
2579    subpage_t *subpage;
2580
2581    cpu_notify_set_memory(start_addr, size, phys_offset);
2582
2583    if (phys_offset == IO_MEM_UNASSIGNED) {
2584        region_offset = start_addr;
2585    }
2586    region_offset &= TARGET_PAGE_MASK;
2587    size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2588    end_addr = start_addr + (target_phys_addr_t)size;
2589    for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
2590        p = phys_page_find(addr >> TARGET_PAGE_BITS);
2591        if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2592            ram_addr_t orig_memory = p->phys_offset;
2593            target_phys_addr_t start_addr2, end_addr2;
2594            int need_subpage = 0;
2595
2596            CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2597                          need_subpage);
2598            if (need_subpage) {
2599                if (!(orig_memory & IO_MEM_SUBPAGE)) {
2600                    subpage = subpage_init((addr & TARGET_PAGE_MASK),
2601                                           &p->phys_offset, orig_memory,
2602                                           p->region_offset);
2603                } else {
2604                    subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2605                                            >> IO_MEM_SHIFT];
2606                }
2607                subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2608                                 region_offset);
2609                p->region_offset = 0;
2610            } else {
2611                p->phys_offset = phys_offset;
2612                if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2613                    (phys_offset & IO_MEM_ROMD))
2614                    phys_offset += TARGET_PAGE_SIZE;
2615            }
2616        } else {
2617            p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2618            p->phys_offset = phys_offset;
2619            p->region_offset = region_offset;
2620            if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2621                (phys_offset & IO_MEM_ROMD)) {
2622                phys_offset += TARGET_PAGE_SIZE;
2623            } else {
2624                target_phys_addr_t start_addr2, end_addr2;
2625                int need_subpage = 0;
2626
2627                CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2628                              end_addr2, need_subpage);
2629
2630                if (need_subpage) {
2631                    subpage = subpage_init((addr & TARGET_PAGE_MASK),
2632                                           &p->phys_offset, IO_MEM_UNASSIGNED,
2633                                           addr & TARGET_PAGE_MASK);
2634                    subpage_register(subpage, start_addr2, end_addr2,
2635                                     phys_offset, region_offset);
2636                    p->region_offset = 0;
2637                }
2638            }
2639        }
2640        region_offset += TARGET_PAGE_SIZE;
2641    }
2642
2643    /* since each CPU stores ram addresses in its TLB cache, we must
2644       reset the modified entries */
2645    /* XXX: slow ! */
2646    for(env = first_cpu; env != NULL; env = env->next_cpu) {
2647        tlb_flush(env, 1);
2648    }
2649}
2650
2651/* XXX: temporary until new memory mapping API */
2652ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2653{
2654    PhysPageDesc *p;
2655
2656    p = phys_page_find(addr >> TARGET_PAGE_BITS);
2657    if (!p)
2658        return IO_MEM_UNASSIGNED;
2659    return p->phys_offset;
2660}
2661
2662void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2663{
2664    if (kvm_enabled())
2665        kvm_coalesce_mmio_region(addr, size);
2666}
2667
2668void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2669{
2670    if (kvm_enabled())
2671        kvm_uncoalesce_mmio_region(addr, size);
2672}
2673
2674void qemu_flush_coalesced_mmio_buffer(void)
2675{
2676    if (kvm_enabled())
2677        kvm_flush_coalesced_mmio_buffer();
2678}
2679
2680#if defined(__linux__) && !defined(TARGET_S390X)
2681
2682#include <sys/vfs.h>
2683
2684#define HUGETLBFS_MAGIC       0x958458f6
2685
2686static long gethugepagesize(const char *path)
2687{
2688    struct statfs fs;
2689    int ret;
2690
2691    do {
2692        ret = statfs(path, &fs);
2693    } while (ret != 0 && errno == EINTR);
2694
2695    if (ret != 0) {
2696        perror(path);
2697        return 0;
2698    }
2699
2700    if (fs.f_type != HUGETLBFS_MAGIC)
2701        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2702
2703    return fs.f_bsize;
2704}
2705
2706static void *file_ram_alloc(RAMBlock *block,
2707                            ram_addr_t memory,
2708                            const char *path)
2709{
2710    char *filename;
2711    void *area;
2712    int fd;
2713#ifdef MAP_POPULATE
2714    int flags;
2715#endif
2716    unsigned long hpagesize;
2717
2718    hpagesize = gethugepagesize(path);
2719    if (!hpagesize) {
2720        return NULL;
2721    }
2722
2723    if (memory < hpagesize) {
2724        return NULL;
2725    }
2726
2727    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2728        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2729        return NULL;
2730    }
2731
2732    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2733        return NULL;
2734    }
2735
2736    fd = mkstemp(filename);
2737    if (fd < 0) {
2738        perror("unable to create backing store for hugepages");
2739        free(filename);
2740        return NULL;
2741    }
2742    unlink(filename);
2743    free(filename);
2744
2745    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2746
2747    /*
2748     * ftruncate is not supported by hugetlbfs in older
2749     * hosts, so don't bother bailing out on errors.
2750     * If anything goes wrong with it under other filesystems,
2751     * mmap will fail.
2752     */
2753    if (ftruncate(fd, memory))
2754        perror("ftruncate");
2755
2756#ifdef MAP_POPULATE
2757    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2758     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2759     * to sidestep this quirk.
2760     */
2761    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2762    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2763#else
2764    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2765#endif
2766    if (area == MAP_FAILED) {
2767        perror("file_ram_alloc: can't mmap RAM pages");
2768        close(fd);
2769        return (NULL);
2770    }
2771    block->fd = fd;
2772    return area;
2773}
2774#endif
2775
2776static ram_addr_t find_ram_offset(ram_addr_t size)
2777{
2778    RAMBlock *block, *next_block;
2779    ram_addr_t offset = 0, mingap = ULONG_MAX;
2780
2781    if (QLIST_EMPTY(&ram_list.blocks))
2782        return 0;
2783
2784    QLIST_FOREACH(block, &ram_list.blocks, next) {
2785        ram_addr_t end, next = ULONG_MAX;
2786
2787        end = block->offset + block->length;
2788
2789        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2790            if (next_block->offset >= end) {
2791                next = MIN(next, next_block->offset);
2792            }
2793        }
2794        if (next - end >= size && next - end < mingap) {
2795            offset =  end;
2796            mingap = next - end;
2797        }
2798    }
2799    return offset;
2800}
2801
2802static ram_addr_t last_ram_offset(void)
2803{
2804    RAMBlock *block;
2805    ram_addr_t last = 0;
2806
2807    QLIST_FOREACH(block, &ram_list.blocks, next)
2808        last = MAX(last, block->offset + block->length);
2809
2810    return last;
2811}
2812
2813ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2814                                   ram_addr_t size, void *host)
2815{
2816    RAMBlock *new_block, *block;
2817
2818    size = TARGET_PAGE_ALIGN(size);
2819    new_block = qemu_mallocz(sizeof(*new_block));
2820
2821    if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2822        char *id = dev->parent_bus->info->get_dev_path(dev);
2823        if (id) {
2824            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2825            qemu_free(id);
2826        }
2827    }
2828    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2829
2830    QLIST_FOREACH(block, &ram_list.blocks, next) {
2831        if (!strcmp(block->idstr, new_block->idstr)) {
2832            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2833                    new_block->idstr);
2834            abort();
2835        }
2836    }
2837
2838    if (host) {
2839        new_block->host = host;
2840    } else {
2841        if (mem_path) {
2842#if defined (__linux__) && !defined(TARGET_S390X)
2843            new_block->host = file_ram_alloc(new_block, size, mem_path);
2844            if (!new_block->host) {
2845                new_block->host = qemu_vmalloc(size);
2846                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2847            }
2848#else
2849            fprintf(stderr, "-mem-path option unsupported\n");
2850            exit(1);
2851#endif
2852        } else {
2853#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2854            /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
2855            new_block->host = mmap((void*)0x1000000, size,
2856                                   PROT_EXEC|PROT_READ|PROT_WRITE,
2857                                   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
2858#else
2859            new_block->host = qemu_vmalloc(size);
2860#endif
2861            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2862        }
2863    }
2864
2865    new_block->offset = find_ram_offset(size);
2866    new_block->length = size;
2867
2868    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2869
2870    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2871                                       last_ram_offset() >> TARGET_PAGE_BITS);
2872    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2873           0xff, size >> TARGET_PAGE_BITS);
2874
2875    if (kvm_enabled())
2876        kvm_setup_guest_memory(new_block->host, size);
2877
2878    return new_block->offset;
2879}
2880
2881ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2882{
2883    return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2884}
2885
2886void qemu_ram_free(ram_addr_t addr)
2887{
2888    RAMBlock *block;
2889
2890    QLIST_FOREACH(block, &ram_list.blocks, next) {
2891        if (addr == block->offset) {
2892            QLIST_REMOVE(block, next);
2893            if (mem_path) {
2894#if defined (__linux__) && !defined(TARGET_S390X)
2895                if (block->fd) {
2896                    munmap(block->host, block->length);
2897                    close(block->fd);
2898                } else {
2899                    qemu_vfree(block->host);
2900                }
2901#endif
2902            } else {
2903#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2904                munmap(block->host, block->length);
2905#else
2906                qemu_vfree(block->host);
2907#endif
2908            }
2909            qemu_free(block);
2910            return;
2911        }
2912    }
2913
2914}
2915
2916/* Return a host pointer to ram allocated with qemu_ram_alloc.
2917   With the exception of the softmmu code in this file, this should
2918   only be used for local memory (e.g. video ram) that the device owns,
2919   and knows it isn't going to access beyond the end of the block.
2920
2921   It should not be used for general purpose DMA.
2922   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2923 */
2924void *qemu_get_ram_ptr(ram_addr_t addr)
2925{
2926    RAMBlock *block;
2927
2928    QLIST_FOREACH(block, &ram_list.blocks, next) {
2929        if (addr - block->offset < block->length) {
2930            QLIST_REMOVE(block, next);
2931            QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2932            return block->host + (addr - block->offset);
2933        }
2934    }
2935
2936    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2937    abort();
2938
2939    return NULL;
2940}
2941
2942/* Return a host pointer to ram allocated with qemu_ram_alloc.
2943 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2944 */
2945void *qemu_safe_ram_ptr(ram_addr_t addr)
2946{
2947    RAMBlock *block;
2948
2949    QLIST_FOREACH(block, &ram_list.blocks, next) {
2950        if (addr - block->offset < block->length) {
2951            return block->host + (addr - block->offset);
2952        }
2953    }
2954
2955    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2956    abort();
2957
2958    return NULL;
2959}
2960
2961int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2962{
2963    RAMBlock *block;
2964    uint8_t *host = ptr;
2965
2966    QLIST_FOREACH(block, &ram_list.blocks, next) {
2967        if (host - block->host < block->length) {
2968            *ram_addr = block->offset + (host - block->host);
2969            return 0;
2970        }
2971    }
2972    return -1;
2973}
2974
2975/* Some of the softmmu routines need to translate from a host pointer
2976   (typically a TLB entry) back to a ram offset.  */
2977ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2978{
2979    ram_addr_t ram_addr;
2980
2981    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2982        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2983        abort();
2984    }
2985    return ram_addr;
2986}
2987
2988static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
2989{
2990#ifdef DEBUG_UNASSIGNED
2991    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2992#endif
2993#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2994    do_unassigned_access(addr, 0, 0, 0, 1);
2995#endif
2996    return 0;
2997}
2998
2999static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3000{
3001#ifdef DEBUG_UNASSIGNED
3002    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3003#endif
3004#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3005    do_unassigned_access(addr, 0, 0, 0, 2);
3006#endif
3007    return 0;
3008}
3009
3010static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3011{
3012#ifdef DEBUG_UNASSIGNED
3013    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3014#endif
3015#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3016    do_unassigned_access(addr, 0, 0, 0, 4);
3017#endif
3018    return 0;
3019}
3020
3021static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3022{
3023#ifdef DEBUG_UNASSIGNED
3024    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3025#endif
3026#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3027    do_unassigned_access(addr, 1, 0, 0, 1);
3028#endif
3029}
3030
3031static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3032{
3033#ifdef DEBUG_UNASSIGNED
3034    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3035#endif
3036#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3037    do_unassigned_access(addr, 1, 0, 0, 2);
3038#endif
3039}
3040
3041static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3042{
3043#ifdef DEBUG_UNASSIGNED
3044    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3045#endif
3046#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3047    do_unassigned_access(addr, 1, 0, 0, 4);
3048#endif
3049}
3050
3051static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3052    unassigned_mem_readb,
3053    unassigned_mem_readw,
3054    unassigned_mem_readl,
3055};
3056
3057static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3058    unassigned_mem_writeb,
3059    unassigned_mem_writew,
3060    unassigned_mem_writel,
3061};
3062
3063static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3064                                uint32_t val)
3065{
3066    int dirty_flags;
3067    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3068    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3069#if !defined(CONFIG_USER_ONLY)
3070        tb_invalidate_phys_page_fast(ram_addr, 1);
3071        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3072#endif
3073    }
3074    stb_p(qemu_get_ram_ptr(ram_addr), val);
3075    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3076    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3077    /* we remove the notdirty callback only if the code has been
3078       flushed */
3079    if (dirty_flags == 0xff)
3080        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3081}
3082
3083static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3084                                uint32_t val)
3085{
3086    int dirty_flags;
3087    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3088    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3089#if !defined(CONFIG_USER_ONLY)
3090        tb_invalidate_phys_page_fast(ram_addr, 2);
3091        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3092#endif
3093    }
3094    stw_p(qemu_get_ram_ptr(ram_addr), val);
3095    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3096    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3097    /* we remove the notdirty callback only if the code has been
3098       flushed */
3099    if (dirty_flags == 0xff)
3100        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3101}
3102
3103static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3104                                uint32_t val)
3105{
3106    int dirty_flags;
3107    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3108    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3109#if !defined(CONFIG_USER_ONLY)
3110        tb_invalidate_phys_page_fast(ram_addr, 4);
3111        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3112#endif
3113    }
3114    stl_p(qemu_get_ram_ptr(ram_addr), val);
3115    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3116    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3117    /* we remove the notdirty callback only if the code has been
3118       flushed */
3119    if (dirty_flags == 0xff)
3120        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3121}
3122
3123static CPUReadMemoryFunc * const error_mem_read[3] = {
3124    NULL, /* never used */
3125    NULL, /* never used */
3126    NULL, /* never used */
3127};
3128
3129static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3130    notdirty_mem_writeb,
3131    notdirty_mem_writew,
3132    notdirty_mem_writel,
3133};
3134
3135/* Generate a debug exception if a watchpoint has been hit.  */
3136static void check_watchpoint(int offset, int len_mask, int flags)
3137{
3138    CPUState *env = cpu_single_env;
3139    target_ulong pc, cs_base;
3140    TranslationBlock *tb;
3141    target_ulong vaddr;
3142    CPUWatchpoint *wp;
3143    int cpu_flags;
3144
3145    if (env->watchpoint_hit) {
3146        /* We re-entered the check after replacing the TB. Now raise
3147         * the debug interrupt so that is will trigger after the
3148         * current instruction. */
3149        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3150        return;
3151    }
3152    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3153    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3154        if ((vaddr == (wp->vaddr & len_mask) ||
3155             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3156            wp->flags |= BP_WATCHPOINT_HIT;
3157            if (!env->watchpoint_hit) {
3158                env->watchpoint_hit = wp;
3159                tb = tb_find_pc(env->mem_io_pc);
3160                if (!tb) {
3161                    cpu_abort(env, "check_watchpoint: could not find TB for "
3162                              "pc=%p", (void *)env->mem_io_pc);
3163                }
3164                cpu_restore_state(tb, env, env->mem_io_pc, NULL);
3165                tb_phys_invalidate(tb, -1);
3166                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3167                    env->exception_index = EXCP_DEBUG;
3168                } else {
3169                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3170                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3171                }
3172                cpu_resume_from_signal(env, NULL);
3173            }
3174        } else {
3175            wp->flags &= ~BP_WATCHPOINT_HIT;
3176        }
3177    }
3178}
3179
3180/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
3181   so these check for a hit then pass through to the normal out-of-line
3182   phys routines.  */
3183static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3184{
3185    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3186    return ldub_phys(addr);
3187}
3188
3189static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3190{
3191    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3192    return lduw_phys(addr);
3193}
3194
3195static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3196{
3197    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3198    return ldl_phys(addr);
3199}
3200
3201static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3202                             uint32_t val)
3203{
3204    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3205    stb_phys(addr, val);
3206}
3207
3208static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3209                             uint32_t val)
3210{
3211    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3212    stw_phys(addr, val);
3213}
3214
3215static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3216                             uint32_t val)
3217{
3218    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3219    stl_phys(addr, val);
3220}
3221
3222static CPUReadMemoryFunc * const watch_mem_read[3] = {
3223    watch_mem_readb,
3224    watch_mem_readw,
3225    watch_mem_readl,
3226};
3227
3228static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3229    watch_mem_writeb,
3230    watch_mem_writew,
3231    watch_mem_writel,
3232};
3233
3234static inline uint32_t subpage_readlen (subpage_t *mmio,
3235                                        target_phys_addr_t addr,
3236                                        unsigned int len)
3237{
3238    unsigned int idx = SUBPAGE_IDX(addr);
3239#if defined(DEBUG_SUBPAGE)
3240    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3241           mmio, len, addr, idx);
3242#endif
3243
3244    addr += mmio->region_offset[idx];
3245    idx = mmio->sub_io_index[idx];
3246    return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3247}
3248
3249static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3250                                     uint32_t value, unsigned int len)
3251{
3252    unsigned int idx = SUBPAGE_IDX(addr);
3253#if defined(DEBUG_SUBPAGE)
3254    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3255           __func__, mmio, len, addr, idx, value);
3256#endif
3257
3258    addr += mmio->region_offset[idx];
3259    idx = mmio->sub_io_index[idx];
3260    io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3261}
3262
3263static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3264{
3265    return subpage_readlen(opaque, addr, 0);
3266}
3267
3268static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3269                            uint32_t value)
3270{
3271    subpage_writelen(opaque, addr, value, 0);
3272}
3273
3274static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3275{
3276    return subpage_readlen(opaque, addr, 1);
3277}
3278
3279static void subpage_writew (void *opaque, target_phys_addr_t addr,
3280                            uint32_t value)
3281{
3282    subpage_writelen(opaque, addr, value, 1);
3283}
3284
3285static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3286{
3287    return subpage_readlen(opaque, addr, 2);
3288}
3289
3290static void subpage_writel (void *opaque, target_phys_addr_t addr,
3291                            uint32_t value)
3292{
3293    subpage_writelen(opaque, addr, value, 2);
3294}
3295
3296static CPUReadMemoryFunc * const subpage_read[] = {
3297    &subpage_readb,
3298    &subpage_readw,
3299    &subpage_readl,
3300};
3301
3302static CPUWriteMemoryFunc * const subpage_write[] = {
3303    &subpage_writeb,
3304    &subpage_writew,
3305    &subpage_writel,
3306};
3307
3308static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3309                             ram_addr_t memory, ram_addr_t region_offset)
3310{
3311    int idx, eidx;
3312
3313    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3314        return -1;
3315    idx = SUBPAGE_IDX(start);
3316    eidx = SUBPAGE_IDX(end);
3317#if defined(DEBUG_SUBPAGE)
3318    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3319           mmio, start, end, idx, eidx, memory);
3320#endif
3321    if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3322        memory = IO_MEM_UNASSIGNED;
3323    memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3324    for (; idx <= eidx; idx++) {
3325        mmio->sub_io_index[idx] = memory;
3326        mmio->region_offset[idx] = region_offset;
3327    }
3328
3329    return 0;
3330}
3331
3332static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3333                                ram_addr_t orig_memory,
3334                                ram_addr_t region_offset)
3335{
3336    subpage_t *mmio;
3337    int subpage_memory;
3338
3339    mmio = qemu_mallocz(sizeof(subpage_t));
3340
3341    mmio->base = base;
3342    subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3343                                            DEVICE_NATIVE_ENDIAN);
3344#if defined(DEBUG_SUBPAGE)
3345    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3346           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3347#endif
3348    *phys = subpage_memory | IO_MEM_SUBPAGE;
3349    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3350
3351    return mmio;
3352}
3353
3354static int get_free_io_mem_idx(void)
3355{
3356    int i;
3357
3358    for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3359        if (!io_mem_used[i]) {
3360            io_mem_used[i] = 1;
3361            return i;
3362        }
3363    fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3364    return -1;
3365}
3366
3367/*
3368 * Usually, devices operate in little endian mode. There are devices out
3369 * there that operate in big endian too. Each device gets byte swapped
3370 * mmio if plugged onto a CPU that does the other endianness.
3371 *
3372 * CPU          Device           swap?
3373 *
3374 * little       little           no
3375 * little       big              yes
3376 * big          little           yes
3377 * big          big              no
3378 */
3379
3380typedef struct SwapEndianContainer {
3381    CPUReadMemoryFunc *read[3];
3382    CPUWriteMemoryFunc *write[3];
3383    void *opaque;
3384} SwapEndianContainer;
3385
3386static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3387{
3388    uint32_t val;
3389    SwapEndianContainer *c = opaque;
3390    val = c->read[0](c->opaque, addr);
3391    return val;
3392}
3393
3394static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3395{
3396    uint32_t val;
3397    SwapEndianContainer *c = opaque;
3398    val = bswap16(c->read[1](c->opaque, addr));
3399    return val;
3400}
3401
3402static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3403{
3404    uint32_t val;
3405    SwapEndianContainer *c = opaque;
3406    val = bswap32(c->read[2](c->opaque, addr));
3407    return val;
3408}
3409
3410static CPUReadMemoryFunc * const swapendian_readfn[3]={
3411    swapendian_mem_readb,
3412    swapendian_mem_readw,
3413    swapendian_mem_readl
3414};
3415
3416static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3417                                  uint32_t val)
3418{
3419    SwapEndianContainer *c = opaque;
3420    c->write[0](c->opaque, addr, val);
3421}
3422
3423static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3424                                  uint32_t val)
3425{
3426    SwapEndianContainer *c = opaque;
3427    c->write[1](c->opaque, addr, bswap16(val));
3428}
3429
3430static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3431                                  uint32_t val)
3432{
3433    SwapEndianContainer *c = opaque;
3434    c->write[2](c->opaque, addr, bswap32(val));
3435}
3436
3437static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3438    swapendian_mem_writeb,
3439    swapendian_mem_writew,
3440    swapendian_mem_writel
3441};
3442
3443static void swapendian_init(int io_index)
3444{
3445    SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3446    int i;
3447
3448    /* Swap mmio for big endian targets */
3449    c->opaque = io_mem_opaque[io_index];
3450    for (i = 0; i < 3; i++) {
3451        c->read[i] = io_mem_read[io_index][i];
3452        c->write[i] = io_mem_write[io_index][i];
3453
3454        io_mem_read[io_index][i] = swapendian_readfn[i];
3455        io_mem_write[io_index][i] = swapendian_writefn[i];
3456    }
3457    io_mem_opaque[io_index] = c;
3458}
3459
3460static void swapendian_del(int io_index)
3461{
3462    if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3463        qemu_free(io_mem_opaque[io_index]);
3464    }
3465}
3466
3467/* mem_read and mem_write are arrays of functions containing the
3468   function to access byte (index 0), word (index 1) and dword (index
3469   2). Functions can be omitted with a NULL function pointer.
3470   If io_index is non zero, the corresponding io zone is
3471   modified. If it is zero, a new io zone is allocated. The return
3472   value can be used with cpu_register_physical_memory(). (-1) is
3473   returned if error. */
3474static int cpu_register_io_memory_fixed(int io_index,
3475                                        CPUReadMemoryFunc * const *mem_read,
3476                                        CPUWriteMemoryFunc * const *mem_write,
3477                                        void *opaque, enum device_endian endian)
3478{
3479    int i;
3480
3481    if (io_index <= 0) {
3482        io_index = get_free_io_mem_idx();
3483        if (io_index == -1)
3484            return io_index;
3485    } else {
3486        io_index >>= IO_MEM_SHIFT;
3487        if (io_index >= IO_MEM_NB_ENTRIES)
3488            return -1;
3489    }
3490
3491    for (i = 0; i < 3; ++i) {
3492        io_mem_read[io_index][i]
3493            = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3494    }
3495    for (i = 0; i < 3; ++i) {
3496        io_mem_write[io_index][i]
3497            = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3498    }
3499    io_mem_opaque[io_index] = opaque;
3500
3501    switch (endian) {
3502    case DEVICE_BIG_ENDIAN:
3503#ifndef TARGET_WORDS_BIGENDIAN
3504        swapendian_init(io_index);
3505#endif
3506        break;
3507    case DEVICE_LITTLE_ENDIAN:
3508#ifdef TARGET_WORDS_BIGENDIAN
3509        swapendian_init(io_index);
3510#endif
3511        break;
3512    case DEVICE_NATIVE_ENDIAN:
3513    default:
3514        break;
3515    }
3516
3517    return (io_index << IO_MEM_SHIFT);
3518}
3519
3520int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3521                           CPUWriteMemoryFunc * const *mem_write,
3522                           void *opaque, enum device_endian endian)
3523{
3524    return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3525}
3526
3527void cpu_unregister_io_memory(int io_table_address)
3528{
3529    int i;
3530    int io_index = io_table_address >> IO_MEM_SHIFT;
3531
3532    swapendian_del(io_index);
3533
3534    for (i=0;i < 3; i++) {
3535        io_mem_read[io_index][i] = unassigned_mem_read[i];
3536        io_mem_write[io_index][i] = unassigned_mem_write[i];
3537    }
3538    io_mem_opaque[io_index] = NULL;
3539    io_mem_used[io_index] = 0;
3540}
3541
3542static void io_mem_init(void)
3543{
3544    int i;
3545
3546    cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3547                                 unassigned_mem_write, NULL,
3548                                 DEVICE_NATIVE_ENDIAN);
3549    cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3550                                 unassigned_mem_write, NULL,
3551                                 DEVICE_NATIVE_ENDIAN);
3552    cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3553                                 notdirty_mem_write, NULL,
3554                                 DEVICE_NATIVE_ENDIAN);
3555    for (i=0; i<5; i++)
3556        io_mem_used[i] = 1;
3557
3558    io_mem_watch = cpu_register_io_memory(watch_mem_read,
3559                                          watch_mem_write, NULL,
3560                                          DEVICE_NATIVE_ENDIAN);
3561}
3562
3563#endif /* !defined(CONFIG_USER_ONLY) */
3564
3565/* physical memory access (slow version, mainly for debug) */
3566#if defined(CONFIG_USER_ONLY)
3567int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3568                        uint8_t *buf, int len, int is_write)
3569{
3570    int l, flags;
3571    target_ulong page;
3572    void * p;
3573
3574    while (len > 0) {
3575        page = addr & TARGET_PAGE_MASK;
3576        l = (page + TARGET_PAGE_SIZE) - addr;
3577        if (l > len)
3578            l = len;
3579        flags = page_get_flags(page);
3580        if (!(flags & PAGE_VALID))
3581            return -1;
3582        if (is_write) {
3583            if (!(flags & PAGE_WRITE))
3584                return -1;
3585            /* XXX: this code should not depend on lock_user */
3586            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3587                return -1;
3588            memcpy(p, buf, l);
3589            unlock_user(p, addr, l);
3590        } else {
3591            if (!(flags & PAGE_READ))
3592                return -1;
3593            /* XXX: this code should not depend on lock_user */
3594            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3595                return -1;
3596            memcpy(buf, p, l);
3597            unlock_user(p, addr, 0);
3598        }
3599        len -= l;
3600        buf += l;
3601        addr += l;
3602    }
3603    return 0;
3604}
3605
3606#else
3607void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3608                            int len, int is_write)
3609{
3610    int l, io_index;
3611    uint8_t *ptr;
3612    uint32_t val;
3613    target_phys_addr_t page;
3614    unsigned long pd;
3615    PhysPageDesc *p;
3616
3617    while (len > 0) {
3618        page = addr & TARGET_PAGE_MASK;
3619        l = (page + TARGET_PAGE_SIZE) - addr;
3620        if (l > len)
3621            l = len;
3622        p = phys_page_find(page >> TARGET_PAGE_BITS);
3623        if (!p) {
3624            pd = IO_MEM_UNASSIGNED;
3625        } else {
3626            pd = p->phys_offset;
3627        }
3628
3629        if (is_write) {
3630            if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3631                target_phys_addr_t addr1 = addr;
3632                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3633                if (p)
3634                    addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3635                /* XXX: could force cpu_single_env to NULL to avoid
3636                   potential bugs */
3637                if (l >= 4 && ((addr1 & 3) == 0)) {
3638                    /* 32 bit write access */
3639                    val = ldl_p(buf);
3640                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3641                    l = 4;
3642                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3643                    /* 16 bit write access */
3644                    val = lduw_p(buf);
3645                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3646                    l = 2;
3647                } else {
3648                    /* 8 bit write access */
3649                    val = ldub_p(buf);
3650                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3651                    l = 1;
3652                }
3653            } else {
3654                unsigned long addr1;
3655                addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3656                /* RAM case */
3657                ptr = qemu_get_ram_ptr(addr1);
3658                memcpy(ptr, buf, l);
3659                if (!cpu_physical_memory_is_dirty(addr1)) {
3660                    /* invalidate code */
3661                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3662                    /* set dirty bit */
3663                    cpu_physical_memory_set_dirty_flags(
3664                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3665                }
3666            }
3667        } else {
3668            if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3669                !(pd & IO_MEM_ROMD)) {
3670                target_phys_addr_t addr1 = addr;
3671                /* I/O case */
3672                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3673                if (p)
3674                    addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3675                if (l >= 4 && ((addr1 & 3) == 0)) {
3676                    /* 32 bit read access */
3677                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3678                    stl_p(buf, val);
3679                    l = 4;
3680                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3681                    /* 16 bit read access */
3682                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3683                    stw_p(buf, val);
3684                    l = 2;
3685                } else {
3686                    /* 8 bit read access */
3687                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3688                    stb_p(buf, val);
3689                    l = 1;
3690                }
3691            } else {
3692                /* RAM case */
3693                ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3694                    (addr & ~TARGET_PAGE_MASK);
3695                memcpy(buf, ptr, l);
3696            }
3697        }
3698        len -= l;
3699        buf += l;
3700        addr += l;
3701    }
3702}
3703
3704/* used for ROM loading : can write in RAM and ROM */
3705void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3706                                   const uint8_t *buf, int len)
3707{
3708    int l;
3709    uint8_t *ptr;
3710    target_phys_addr_t page;
3711    unsigned long pd;
3712    PhysPageDesc *p;
3713
3714    while (len > 0) {
3715        page = addr & TARGET_PAGE_MASK;
3716        l = (page + TARGET_PAGE_SIZE) - addr;
3717        if (l > len)
3718            l = len;
3719        p = phys_page_find(page >> TARGET_PAGE_BITS);
3720        if (!p) {
3721            pd = IO_MEM_UNASSIGNED;
3722        } else {
3723            pd = p->phys_offset;
3724        }
3725
3726        if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3727            (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3728            !(pd & IO_MEM_ROMD)) {
3729            /* do nothing */
3730        } else {
3731            unsigned long addr1;
3732            addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3733            /* ROM/RAM case */
3734            ptr = qemu_get_ram_ptr(addr1);
3735            memcpy(ptr, buf, l);
3736        }
3737        len -= l;
3738        buf += l;
3739        addr += l;
3740    }
3741}
3742
3743typedef struct {
3744    void *buffer;
3745    target_phys_addr_t addr;
3746    target_phys_addr_t len;
3747} BounceBuffer;
3748
3749static BounceBuffer bounce;
3750
3751typedef struct MapClient {
3752    void *opaque;
3753    void (*callback)(void *opaque);
3754    QLIST_ENTRY(MapClient) link;
3755} MapClient;
3756
3757static QLIST_HEAD(map_client_list, MapClient) map_client_list
3758    = QLIST_HEAD_INITIALIZER(map_client_list);
3759
3760void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3761{
3762    MapClient *client = qemu_malloc(sizeof(*client));
3763
3764    client->opaque = opaque;
3765    client->callback = callback;
3766    QLIST_INSERT_HEAD(&map_client_list, client, link);
3767    return client;
3768}
3769
3770void cpu_unregister_map_client(void *_client)
3771{
3772    MapClient *client = (MapClient *)_client;
3773
3774    QLIST_REMOVE(client, link);
3775    qemu_free(client);
3776}
3777
3778static void cpu_notify_map_clients(void)
3779{
3780    MapClient *client;
3781
3782    while (!QLIST_EMPTY(&map_client_list)) {
3783        client = QLIST_FIRST(&map_client_list);
3784        client->callback(client->opaque);
3785        cpu_unregister_map_client(client);
3786    }
3787}
3788
3789/* Map a physical memory region into a host virtual address.
3790 * May map a subset of the requested range, given by and returned in *plen.
3791 * May return NULL if resources needed to perform the mapping are exhausted.
3792 * Use only for reads OR writes - not for read-modify-write operations.
3793 * Use cpu_register_map_client() to know when retrying the map operation is
3794 * likely to succeed.
3795 */
3796void *cpu_physical_memory_map(target_phys_addr_t addr,
3797                              target_phys_addr_t *plen,
3798                              int is_write)
3799{
3800    target_phys_addr_t len = *plen;
3801    target_phys_addr_t done = 0;
3802    int l;
3803    uint8_t *ret = NULL;
3804    uint8_t *ptr;
3805    target_phys_addr_t page;
3806    unsigned long pd;
3807    PhysPageDesc *p;
3808    unsigned long addr1;
3809
3810    while (len > 0) {
3811        page = addr & TARGET_PAGE_MASK;
3812        l = (page + TARGET_PAGE_SIZE) - addr;
3813        if (l > len)
3814            l = len;
3815        p = phys_page_find(page >> TARGET_PAGE_BITS);
3816        if (!p) {
3817            pd = IO_MEM_UNASSIGNED;
3818        } else {
3819            pd = p->phys_offset;
3820        }
3821
3822        if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3823            if (done || bounce.buffer) {
3824                break;
3825            }
3826            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3827            bounce.addr = addr;
3828            bounce.len = l;
3829            if (!is_write) {
3830                cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
3831            }
3832            ptr = bounce.buffer;
3833        } else {
3834            addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3835            ptr = qemu_get_ram_ptr(addr1);
3836        }
3837        if (!done) {
3838            ret = ptr;
3839        } else if (ret + done != ptr) {
3840            break;
3841        }
3842
3843        len -= l;
3844        addr += l;
3845        done += l;
3846    }
3847    *plen = done;
3848    return ret;
3849}
3850
3851/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3852 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3853 * the amount of memory that was actually read or written by the caller.
3854 */
3855void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3856                               int is_write, target_phys_addr_t access_len)
3857{
3858    if (buffer != bounce.buffer) {
3859        if (is_write) {
3860            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3861            while (access_len) {
3862                unsigned l;
3863                l = TARGET_PAGE_SIZE;
3864                if (l > access_len)
3865                    l = access_len;
3866                if (!cpu_physical_memory_is_dirty(addr1)) {
3867                    /* invalidate code */
3868                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3869                    /* set dirty bit */
3870                    cpu_physical_memory_set_dirty_flags(
3871                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3872                }
3873                addr1 += l;
3874                access_len -= l;
3875            }
3876        }
3877        return;
3878    }
3879    if (is_write) {
3880        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3881    }
3882    qemu_vfree(bounce.buffer);
3883    bounce.buffer = NULL;
3884    cpu_notify_map_clients();
3885}
3886
3887/* warning: addr must be aligned */
3888uint32_t ldl_phys(target_phys_addr_t addr)
3889{
3890    int io_index;
3891    uint8_t *ptr;
3892    uint32_t val;
3893    unsigned long pd;
3894    PhysPageDesc *p;
3895
3896    p = phys_page_find(addr >> TARGET_PAGE_BITS);
3897    if (!p) {
3898        pd = IO_MEM_UNASSIGNED;
3899    } else {
3900        pd = p->phys_offset;
3901    }
3902
3903    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3904        !(pd & IO_MEM_ROMD)) {
3905        /* I/O case */
3906        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3907        if (p)
3908            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3909        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3910    } else {
3911        /* RAM case */
3912        ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3913            (addr & ~TARGET_PAGE_MASK);
3914        val = ldl_p(ptr);
3915    }
3916    return val;
3917}
3918
3919/* warning: addr must be aligned */
3920uint64_t ldq_phys(target_phys_addr_t addr)
3921{
3922    int io_index;
3923    uint8_t *ptr;
3924    uint64_t val;
3925    unsigned long pd;
3926    PhysPageDesc *p;
3927
3928    p = phys_page_find(addr >> TARGET_PAGE_BITS);
3929    if (!p) {
3930        pd = IO_MEM_UNASSIGNED;
3931    } else {
3932        pd = p->phys_offset;
3933    }
3934
3935    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3936        !(pd & IO_MEM_ROMD)) {
3937        /* I/O case */
3938        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3939        if (p)
3940            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3941#ifdef TARGET_WORDS_BIGENDIAN
3942        val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
3943        val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
3944#else
3945        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3946        val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
3947#endif
3948    } else {
3949        /* RAM case */
3950        ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3951            (addr & ~TARGET_PAGE_MASK);
3952        val = ldq_p(ptr);
3953    }
3954    return val;
3955}
3956
3957/* XXX: optimize */
3958uint32_t ldub_phys(target_phys_addr_t addr)
3959{
3960    uint8_t val;
3961    cpu_physical_memory_read(addr, &val, 1);
3962    return val;
3963}
3964
3965/* warning: addr must be aligned */
3966uint32_t lduw_phys(target_phys_addr_t addr)
3967{
3968    int io_index;
3969    uint8_t *ptr;
3970    uint64_t val;
3971    unsigned long pd;
3972    PhysPageDesc *p;
3973
3974    p = phys_page_find(addr >> TARGET_PAGE_BITS);
3975    if (!p) {
3976        pd = IO_MEM_UNASSIGNED;
3977    } else {
3978        pd = p->phys_offset;
3979    }
3980
3981    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3982        !(pd & IO_MEM_ROMD)) {
3983        /* I/O case */
3984        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3985        if (p)
3986            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3987        val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
3988    } else {
3989        /* RAM case */
3990        ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3991            (addr & ~TARGET_PAGE_MASK);
3992        val = lduw_p(ptr);
3993    }
3994    return val;
3995}
3996
3997/* warning: addr must be aligned. The ram page is not masked as dirty
3998   and the code inside is not invalidated. It is useful if the dirty
3999   bits are used to track modified PTEs */
4000void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4001{
4002    int io_index;
4003    uint8_t *ptr;
4004    unsigned long pd;
4005    PhysPageDesc *p;
4006
4007    p = phys_page_find(addr >> TARGET_PAGE_BITS);
4008    if (!p) {
4009        pd = IO_MEM_UNASSIGNED;
4010    } else {
4011        pd = p->phys_offset;
4012    }
4013
4014    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4015        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4016        if (p)
4017            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4018        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4019    } else {
4020        unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4021        ptr = qemu_get_ram_ptr(addr1);
4022        stl_p(ptr, val);
4023
4024        if (unlikely(in_migration)) {
4025            if (!cpu_physical_memory_is_dirty(addr1)) {
4026                /* invalidate code */
4027                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4028                /* set dirty bit */
4029                cpu_physical_memory_set_dirty_flags(
4030                    addr1, (0xff & ~CODE_DIRTY_FLAG));
4031            }
4032        }
4033    }
4034}
4035
4036void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4037{
4038    int io_index;
4039    uint8_t *ptr;
4040    unsigned long pd;
4041    PhysPageDesc *p;
4042
4043    p = phys_page_find(addr >> TARGET_PAGE_BITS);
4044    if (!p) {
4045        pd = IO_MEM_UNASSIGNED;
4046    } else {
4047        pd = p->phys_offset;
4048    }
4049
4050    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4051        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4052        if (p)
4053            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4054#ifdef TARGET_WORDS_BIGENDIAN
4055        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4056        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4057#else
4058        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4059        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4060#endif
4061    } else {
4062        ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4063            (addr & ~TARGET_PAGE_MASK);
4064        stq_p(ptr, val);
4065    }
4066}
4067
4068/* warning: addr must be aligned */
4069void stl_phys(target_phys_addr_t addr, uint32_t val)
4070{
4071    int io_index;
4072    uint8_t *ptr;
4073    unsigned long pd;
4074    PhysPageDesc *p;
4075
4076    p = phys_page_find(addr >> TARGET_PAGE_BITS);
4077    if (!p) {
4078        pd = IO_MEM_UNASSIGNED;
4079    } else {
4080        pd = p->phys_offset;
4081    }
4082
4083    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4084        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4085        if (p)
4086            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4087        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4088    } else {
4089        unsigned long addr1;
4090        addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4091        /* RAM case */
4092        ptr = qemu_get_ram_ptr(addr1);
4093        stl_p(ptr, val);
4094        if (!cpu_physical_memory_is_dirty(addr1)) {
4095            /* invalidate code */
4096            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4097            /* set dirty bit */
4098            cpu_physical_memory_set_dirty_flags(addr1,
4099                (0xff & ~CODE_DIRTY_FLAG));
4100        }
4101    }
4102}
4103
4104/* XXX: optimize */
4105void stb_phys(target_phys_addr_t addr, uint32_t val)
4106{
4107    uint8_t v = val;
4108    cpu_physical_memory_write(addr, &v, 1);
4109}
4110
4111/* warning: addr must be aligned */
4112void stw_phys(target_phys_addr_t addr, uint32_t val)
4113{
4114    int io_index;
4115    uint8_t *ptr;
4116    unsigned long pd;
4117    PhysPageDesc *p;
4118
4119    p = phys_page_find(addr >> TARGET_PAGE_BITS);
4120    if (!p) {
4121        pd = IO_MEM_UNASSIGNED;
4122    } else {
4123        pd = p->phys_offset;
4124    }
4125
4126    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4127        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4128        if (p)
4129            addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4130        io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4131    } else {
4132        unsigned long addr1;
4133        addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4134        /* RAM case */
4135        ptr = qemu_get_ram_ptr(addr1);
4136        stw_p(ptr, val);
4137        if (!cpu_physical_memory_is_dirty(addr1)) {
4138            /* invalidate code */
4139            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4140            /* set dirty bit */
4141            cpu_physical_memory_set_dirty_flags(addr1,
4142                (0xff & ~CODE_DIRTY_FLAG));
4143        }
4144    }
4145}
4146
4147/* XXX: optimize */
4148void stq_phys(target_phys_addr_t addr, uint64_t val)
4149{
4150    val = tswap64(val);
4151    cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
4152}
4153
4154/* virtual memory access for debug (includes writing to ROM) */
4155int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4156                        uint8_t *buf, int len, int is_write)
4157{
4158    int l;
4159    target_phys_addr_t phys_addr;
4160    target_ulong page;
4161
4162    while (len > 0) {
4163        page = addr & TARGET_PAGE_MASK;
4164        phys_addr = cpu_get_phys_page_debug(env, page);
4165        /* if no physical page mapped, return an error */
4166        if (phys_addr == -1)
4167            return -1;
4168        l = (page + TARGET_PAGE_SIZE) - addr;
4169        if (l > len)
4170            l = len;
4171        phys_addr += (addr & ~TARGET_PAGE_MASK);
4172        if (is_write)
4173            cpu_physical_memory_write_rom(phys_addr, buf, l);
4174        else
4175            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4176        len -= l;
4177        buf += l;
4178        addr += l;
4179    }
4180    return 0;
4181}
4182#endif
4183
4184/* in deterministic execution mode, instructions doing device I/Os
4185   must be at the end of the TB */
4186void cpu_io_recompile(CPUState *env, void *retaddr)
4187{
4188    TranslationBlock *tb;
4189    uint32_t n, cflags;
4190    target_ulong pc, cs_base;
4191    uint64_t flags;
4192
4193    tb = tb_find_pc((unsigned long)retaddr);
4194    if (!tb) {
4195        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4196                  retaddr);
4197    }
4198    n = env->icount_decr.u16.low + tb->icount;
4199    cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
4200    /* Calculate how many instructions had been executed before the fault
4201       occurred.  */
4202    n = n - env->icount_decr.u16.low;
4203    /* Generate a new TB ending on the I/O insn.  */
4204    n++;
4205    /* On MIPS and SH, delay slot instructions can only be restarted if
4206       they were already the first instruction in the TB.  If this is not
4207       the first instruction in a TB then re-execute the preceding
4208       branch.  */
4209#if defined(TARGET_MIPS)
4210    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4211        env->active_tc.PC -= 4;
4212        env->icount_decr.u16.low++;
4213        env->hflags &= ~MIPS_HFLAG_BMASK;
4214    }
4215#elif defined(TARGET_SH4)
4216    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4217            && n > 1) {
4218        env->pc -= 2;
4219        env->icount_decr.u16.low++;
4220        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4221    }
4222#endif
4223    /* This should never happen.  */
4224    if (n > CF_COUNT_MASK)
4225        cpu_abort(env, "TB too big during recompile");
4226
4227    cflags = n | CF_LAST_IO;
4228    pc = tb->pc;
4229    cs_base = tb->cs_base;
4230    flags = tb->flags;
4231    tb_phys_invalidate(tb, -1);
4232    /* FIXME: In theory this could raise an exception.  In practice
4233       we have already translated the block once so it's probably ok.  */
4234    tb_gen_code(env, pc, cs_base, flags, cflags);
4235    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4236       the first in the TB) then we end up generating a whole new TB and
4237       repeating the fault, which is horribly inefficient.
4238       Better would be to execute just this insn uncached, or generate a
4239       second new TB.  */
4240    cpu_resume_from_signal(env, NULL);
4241}
4242
4243#if !defined(CONFIG_USER_ONLY)
4244
4245void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4246{
4247    int i, target_code_size, max_target_code_size;
4248    int direct_jmp_count, direct_jmp2_count, cross_page;
4249    TranslationBlock *tb;
4250
4251    target_code_size = 0;
4252    max_target_code_size = 0;
4253    cross_page = 0;
4254    direct_jmp_count = 0;
4255    direct_jmp2_count = 0;
4256    for(i = 0; i < nb_tbs; i++) {
4257        tb = &tbs[i];
4258        target_code_size += tb->size;
4259        if (tb->size > max_target_code_size)
4260            max_target_code_size = tb->size;
4261        if (tb->page_addr[1] != -1)
4262            cross_page++;
4263        if (tb->tb_next_offset[0] != 0xffff) {
4264            direct_jmp_count++;
4265            if (tb->tb_next_offset[1] != 0xffff) {
4266                direct_jmp2_count++;
4267            }
4268        }
4269    }
4270    /* XXX: avoid using doubles ? */
4271    cpu_fprintf(f, "Translation buffer state:\n");
4272    cpu_fprintf(f, "gen code size       %td/%ld\n",
4273                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4274    cpu_fprintf(f, "TB count            %d/%d\n", 
4275                nb_tbs, code_gen_max_blocks);
4276    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4277                nb_tbs ? target_code_size / nb_tbs : 0,
4278                max_target_code_size);
4279    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4280                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4281                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4282    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4283            cross_page,
4284            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4285    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4286                direct_jmp_count,
4287                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4288                direct_jmp2_count,
4289                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4290    cpu_fprintf(f, "\nStatistics:\n");
4291    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4292    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4293    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4294    tcg_dump_info(f, cpu_fprintf);
4295}
4296
4297#define MMUSUFFIX _cmmu
4298#define GETPC() NULL
4299#define env cpu_single_env
4300#define SOFTMMU_CODE_ACCESS
4301
4302#define SHIFT 0
4303#include "softmmu_template.h"
4304
4305#define SHIFT 1
4306#include "softmmu_template.h"
4307
4308#define SHIFT 2
4309#include "softmmu_template.h"
4310
4311#define SHIFT 3
4312#include "softmmu_template.h"
4313
4314#undef env
4315
4316#endif
4317