qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifdef _WIN32
  21#include <windows.h>
  22#else
  23#include <sys/types.h>
  24#include <sys/mman.h>
  25#endif
  26
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "tcg.h"
  30#include "hw/hw.h"
  31#include "hw/qdev.h"
  32#include "qemu/osdep.h"
  33#include "sysemu/kvm.h"
  34#include "hw/xen.h"
  35#include "qemu/timer.h"
  36#include "qemu/config-file.h"
  37#include "exec/memory.h"
  38#include "sysemu/dma.h"
  39#include "exec/address-spaces.h"
  40#if defined(CONFIG_USER_ONLY)
  41#include <qemu.h>
  42#else /* !CONFIG_USER_ONLY */
  43#include "sysemu/xen-mapcache.h"
  44#include "trace.h"
  45#endif
  46#include "exec/cpu-all.h"
  47
  48#include "exec/cputlb.h"
  49#include "translate-all.h"
  50
  51#include "exec/memory-internal.h"
  52
  53//#define DEBUG_UNASSIGNED
  54//#define DEBUG_SUBPAGE
  55
  56#if !defined(CONFIG_USER_ONLY)
  57int phys_ram_fd;
  58static int in_migration;
  59
  60RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
  61
  62static MemoryRegion *system_memory;
  63static MemoryRegion *system_io;
  64
  65AddressSpace address_space_io;
  66AddressSpace address_space_memory;
  67DMAContext dma_context_memory;
  68
  69MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
  70static MemoryRegion io_mem_subpage_ram;
  71
  72#endif
  73
  74CPUArchState *first_cpu;
  75/* current CPU in the current thread. It is only valid inside
  76   cpu_exec() */
  77DEFINE_TLS(CPUArchState *,cpu_single_env);
  78/* 0 = Do not count executed instructions.
  79   1 = Precise instruction counting.
  80   2 = Adaptive rate instruction counting.  */
  81int use_icount;
  82
  83#if !defined(CONFIG_USER_ONLY)
  84
  85static MemoryRegionSection *phys_sections;
  86static unsigned phys_sections_nb, phys_sections_nb_alloc;
  87static uint16_t phys_section_unassigned;
  88static uint16_t phys_section_notdirty;
  89static uint16_t phys_section_rom;
  90static uint16_t phys_section_watch;
  91
  92/* Simple allocator for PhysPageEntry nodes */
  93static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
  94static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
  95
  96#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
  97
  98static void io_mem_init(void);
  99static void memory_map_init(void);
 100static void *qemu_safe_ram_ptr(ram_addr_t addr);
 101
 102static MemoryRegion io_mem_watch;
 103#endif
 104
 105#if !defined(CONFIG_USER_ONLY)
 106
 107static void phys_map_node_reserve(unsigned nodes)
 108{
 109    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
 110        typedef PhysPageEntry Node[L2_SIZE];
 111        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
 112        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
 113                                      phys_map_nodes_nb + nodes);
 114        phys_map_nodes = g_renew(Node, phys_map_nodes,
 115                                 phys_map_nodes_nb_alloc);
 116    }
 117}
 118
 119static uint16_t phys_map_node_alloc(void)
 120{
 121    unsigned i;
 122    uint16_t ret;
 123
 124    ret = phys_map_nodes_nb++;
 125    assert(ret != PHYS_MAP_NODE_NIL);
 126    assert(ret != phys_map_nodes_nb_alloc);
 127    for (i = 0; i < L2_SIZE; ++i) {
 128        phys_map_nodes[ret][i].is_leaf = 0;
 129        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 130    }
 131    return ret;
 132}
 133
 134static void phys_map_nodes_reset(void)
 135{
 136    phys_map_nodes_nb = 0;
 137}
 138
 139
 140static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
 141                                hwaddr *nb, uint16_t leaf,
 142                                int level)
 143{
 144    PhysPageEntry *p;
 145    int i;
 146    hwaddr step = (hwaddr)1 << (level * L2_BITS);
 147
 148    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
 149        lp->ptr = phys_map_node_alloc();
 150        p = phys_map_nodes[lp->ptr];
 151        if (level == 0) {
 152            for (i = 0; i < L2_SIZE; i++) {
 153                p[i].is_leaf = 1;
 154                p[i].ptr = phys_section_unassigned;
 155            }
 156        }
 157    } else {
 158        p = phys_map_nodes[lp->ptr];
 159    }
 160    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 161
 162    while (*nb && lp < &p[L2_SIZE]) {
 163        if ((*index & (step - 1)) == 0 && *nb >= step) {
 164            lp->is_leaf = true;
 165            lp->ptr = leaf;
 166            *index += step;
 167            *nb -= step;
 168        } else {
 169            phys_page_set_level(lp, index, nb, leaf, level - 1);
 170        }
 171        ++lp;
 172    }
 173}
 174
 175static void phys_page_set(AddressSpaceDispatch *d,
 176                          hwaddr index, hwaddr nb,
 177                          uint16_t leaf)
 178{
 179    /* Wildly overreserve - it doesn't matter much. */
 180    phys_map_node_reserve(3 * P_L2_LEVELS);
 181
 182    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 183}
 184
 185MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
 186{
 187    PhysPageEntry lp = d->phys_map;
 188    PhysPageEntry *p;
 189    int i;
 190    uint16_t s_index = phys_section_unassigned;
 191
 192    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
 193        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 194            goto not_found;
 195        }
 196        p = phys_map_nodes[lp.ptr];
 197        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
 198    }
 199
 200    s_index = lp.ptr;
 201not_found:
 202    return &phys_sections[s_index];
 203}
 204
 205bool memory_region_is_unassigned(MemoryRegion *mr)
 206{
 207    return mr != &io_mem_ram && mr != &io_mem_rom
 208        && mr != &io_mem_notdirty && !mr->rom_device
 209        && mr != &io_mem_watch;
 210}
 211#endif
 212
 213void cpu_exec_init_all(void)
 214{
 215#if !defined(CONFIG_USER_ONLY)
 216    qemu_mutex_init(&ram_list.mutex);
 217    memory_map_init();
 218    io_mem_init();
 219#endif
 220}
 221
 222#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 223
 224static int cpu_common_post_load(void *opaque, int version_id)
 225{
 226    CPUArchState *env = opaque;
 227
 228    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 229       version_id is increased. */
 230    env->interrupt_request &= ~0x01;
 231    tlb_flush(env, 1);
 232
 233    return 0;
 234}
 235
 236static const VMStateDescription vmstate_cpu_common = {
 237    .name = "cpu_common",
 238    .version_id = 1,
 239    .minimum_version_id = 1,
 240    .minimum_version_id_old = 1,
 241    .post_load = cpu_common_post_load,
 242    .fields      = (VMStateField []) {
 243        VMSTATE_UINT32(halted, CPUArchState),
 244        VMSTATE_UINT32(interrupt_request, CPUArchState),
 245        VMSTATE_END_OF_LIST()
 246    }
 247};
 248#endif
 249
 250CPUState *qemu_get_cpu(int index)
 251{
 252    CPUArchState *env = first_cpu;
 253    CPUState *cpu = NULL;
 254
 255    while (env) {
 256        cpu = ENV_GET_CPU(env);
 257        if (cpu->cpu_index == index) {
 258            break;
 259        }
 260        env = env->next_cpu;
 261    }
 262
 263    return cpu;
 264}
 265
 266void cpu_exec_init(CPUArchState *env)
 267{
 268    CPUState *cpu = ENV_GET_CPU(env);
 269    CPUArchState **penv;
 270    int cpu_index;
 271
 272#if defined(CONFIG_USER_ONLY)
 273    cpu_list_lock();
 274#endif
 275    env->next_cpu = NULL;
 276    penv = &first_cpu;
 277    cpu_index = 0;
 278    while (*penv != NULL) {
 279        penv = &(*penv)->next_cpu;
 280        cpu_index++;
 281    }
 282    cpu->cpu_index = cpu_index;
 283    cpu->numa_node = 0;
 284    QTAILQ_INIT(&env->breakpoints);
 285    QTAILQ_INIT(&env->watchpoints);
 286#ifndef CONFIG_USER_ONLY
 287    cpu->thread_id = qemu_get_thread_id();
 288#endif
 289    *penv = env;
 290#if defined(CONFIG_USER_ONLY)
 291    cpu_list_unlock();
 292#endif
 293#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 294    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
 295    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 296                    cpu_save, cpu_load, env);
 297#endif
 298}
 299
 300#if defined(TARGET_HAS_ICE)
 301#if defined(CONFIG_USER_ONLY)
 302static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 303{
 304    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 305}
 306#else
 307static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 308{
 309    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
 310            (pc & ~TARGET_PAGE_MASK));
 311}
 312#endif
 313#endif /* TARGET_HAS_ICE */
 314
 315#if defined(CONFIG_USER_ONLY)
 316void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
 317
 318{
 319}
 320
 321int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
 322                          int flags, CPUWatchpoint **watchpoint)
 323{
 324    return -ENOSYS;
 325}
 326#else
 327/* Add a watchpoint.  */
 328int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
 329                          int flags, CPUWatchpoint **watchpoint)
 330{
 331    target_ulong len_mask = ~(len - 1);
 332    CPUWatchpoint *wp;
 333
 334    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
 335    if ((len & (len - 1)) || (addr & ~len_mask) ||
 336            len == 0 || len > TARGET_PAGE_SIZE) {
 337        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
 338                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
 339        return -EINVAL;
 340    }
 341    wp = g_malloc(sizeof(*wp));
 342
 343    wp->vaddr = addr;
 344    wp->len_mask = len_mask;
 345    wp->flags = flags;
 346
 347    /* keep all GDB-injected watchpoints in front */
 348    if (flags & BP_GDB)
 349        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
 350    else
 351        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
 352
 353    tlb_flush_page(env, addr);
 354
 355    if (watchpoint)
 356        *watchpoint = wp;
 357    return 0;
 358}
 359
 360/* Remove a specific watchpoint.  */
 361int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
 362                          int flags)
 363{
 364    target_ulong len_mask = ~(len - 1);
 365    CPUWatchpoint *wp;
 366
 367    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 368        if (addr == wp->vaddr && len_mask == wp->len_mask
 369                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 370            cpu_watchpoint_remove_by_ref(env, wp);
 371            return 0;
 372        }
 373    }
 374    return -ENOENT;
 375}
 376
 377/* Remove a specific watchpoint by reference.  */
 378void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
 379{
 380    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
 381
 382    tlb_flush_page(env, watchpoint->vaddr);
 383
 384    g_free(watchpoint);
 385}
 386
 387/* Remove all matching watchpoints.  */
 388void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
 389{
 390    CPUWatchpoint *wp, *next;
 391
 392    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
 393        if (wp->flags & mask)
 394            cpu_watchpoint_remove_by_ref(env, wp);
 395    }
 396}
 397#endif
 398
 399/* Add a breakpoint.  */
 400int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
 401                          CPUBreakpoint **breakpoint)
 402{
 403#if defined(TARGET_HAS_ICE)
 404    CPUBreakpoint *bp;
 405
 406    bp = g_malloc(sizeof(*bp));
 407
 408    bp->pc = pc;
 409    bp->flags = flags;
 410
 411    /* keep all GDB-injected breakpoints in front */
 412    if (flags & BP_GDB)
 413        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
 414    else
 415        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
 416
 417    breakpoint_invalidate(env, pc);
 418
 419    if (breakpoint)
 420        *breakpoint = bp;
 421    return 0;
 422#else
 423    return -ENOSYS;
 424#endif
 425}
 426
 427/* Remove a specific breakpoint.  */
 428int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
 429{
 430#if defined(TARGET_HAS_ICE)
 431    CPUBreakpoint *bp;
 432
 433    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
 434        if (bp->pc == pc && bp->flags == flags) {
 435            cpu_breakpoint_remove_by_ref(env, bp);
 436            return 0;
 437        }
 438    }
 439    return -ENOENT;
 440#else
 441    return -ENOSYS;
 442#endif
 443}
 444
 445/* Remove a specific breakpoint by reference.  */
 446void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
 447{
 448#if defined(TARGET_HAS_ICE)
 449    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
 450
 451    breakpoint_invalidate(env, breakpoint->pc);
 452
 453    g_free(breakpoint);
 454#endif
 455}
 456
 457/* Remove all matching breakpoints. */
 458void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
 459{
 460#if defined(TARGET_HAS_ICE)
 461    CPUBreakpoint *bp, *next;
 462
 463    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
 464        if (bp->flags & mask)
 465            cpu_breakpoint_remove_by_ref(env, bp);
 466    }
 467#endif
 468}
 469
 470/* enable or disable single step mode. EXCP_DEBUG is returned by the
 471   CPU loop after each instruction */
 472void cpu_single_step(CPUArchState *env, int enabled)
 473{
 474#if defined(TARGET_HAS_ICE)
 475    if (env->singlestep_enabled != enabled) {
 476        env->singlestep_enabled = enabled;
 477        if (kvm_enabled())
 478            kvm_update_guest_debug(env, 0);
 479        else {
 480            /* must flush all the translated code to avoid inconsistencies */
 481            /* XXX: only flush what is necessary */
 482            tb_flush(env);
 483        }
 484    }
 485#endif
 486}
 487
 488void cpu_reset_interrupt(CPUArchState *env, int mask)
 489{
 490    env->interrupt_request &= ~mask;
 491}
 492
 493void cpu_exit(CPUArchState *env)
 494{
 495    env->exit_request = 1;
 496    cpu_unlink_tb(env);
 497}
 498
 499void cpu_abort(CPUArchState *env, const char *fmt, ...)
 500{
 501    va_list ap;
 502    va_list ap2;
 503
 504    va_start(ap, fmt);
 505    va_copy(ap2, ap);
 506    fprintf(stderr, "qemu: fatal: ");
 507    vfprintf(stderr, fmt, ap);
 508    fprintf(stderr, "\n");
 509    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 510    if (qemu_log_enabled()) {
 511        qemu_log("qemu: fatal: ");
 512        qemu_log_vprintf(fmt, ap2);
 513        qemu_log("\n");
 514        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 515        qemu_log_flush();
 516        qemu_log_close();
 517    }
 518    va_end(ap2);
 519    va_end(ap);
 520#if defined(CONFIG_USER_ONLY)
 521    {
 522        struct sigaction act;
 523        sigfillset(&act.sa_mask);
 524        act.sa_handler = SIG_DFL;
 525        sigaction(SIGABRT, &act, NULL);
 526    }
 527#endif
 528    abort();
 529}
 530
 531CPUArchState *cpu_copy(CPUArchState *env)
 532{
 533    CPUArchState *new_env = cpu_init(env->cpu_model_str);
 534    CPUArchState *next_cpu = new_env->next_cpu;
 535#if defined(TARGET_HAS_ICE)
 536    CPUBreakpoint *bp;
 537    CPUWatchpoint *wp;
 538#endif
 539
 540    memcpy(new_env, env, sizeof(CPUArchState));
 541
 542    /* Preserve chaining. */
 543    new_env->next_cpu = next_cpu;
 544
 545    /* Clone all break/watchpoints.
 546       Note: Once we support ptrace with hw-debug register access, make sure
 547       BP_CPU break/watchpoints are handled correctly on clone. */
 548    QTAILQ_INIT(&env->breakpoints);
 549    QTAILQ_INIT(&env->watchpoints);
 550#if defined(TARGET_HAS_ICE)
 551    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
 552        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
 553    }
 554    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 555        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
 556                              wp->flags, NULL);
 557    }
 558#endif
 559
 560    return new_env;
 561}
 562
 563#if !defined(CONFIG_USER_ONLY)
 564static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
 565                                      uintptr_t length)
 566{
 567    uintptr_t start1;
 568
 569    /* we modify the TLB cache so that the dirty bit will be set again
 570       when accessing the range */
 571    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
 572    /* Check that we don't span multiple blocks - this breaks the
 573       address comparisons below.  */
 574    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
 575            != (end - 1) - start) {
 576        abort();
 577    }
 578    cpu_tlb_reset_dirty_all(start1, length);
 579
 580}
 581
 582/* Note: start and end must be within the same ram block.  */
 583void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
 584                                     int dirty_flags)
 585{
 586    uintptr_t length;
 587
 588    start &= TARGET_PAGE_MASK;
 589    end = TARGET_PAGE_ALIGN(end);
 590
 591    length = end - start;
 592    if (length == 0)
 593        return;
 594    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
 595
 596    if (tcg_enabled()) {
 597        tlb_reset_dirty_range_all(start, end, length);
 598    }
 599}
 600
 601static int cpu_physical_memory_set_dirty_tracking(int enable)
 602{
 603    int ret = 0;
 604    in_migration = enable;
 605    return ret;
 606}
 607
 608hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 609                                                   MemoryRegionSection *section,
 610                                                   target_ulong vaddr,
 611                                                   hwaddr paddr,
 612                                                   int prot,
 613                                                   target_ulong *address)
 614{
 615    hwaddr iotlb;
 616    CPUWatchpoint *wp;
 617
 618    if (memory_region_is_ram(section->mr)) {
 619        /* Normal RAM.  */
 620        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 621            + memory_region_section_addr(section, paddr);
 622        if (!section->readonly) {
 623            iotlb |= phys_section_notdirty;
 624        } else {
 625            iotlb |= phys_section_rom;
 626        }
 627    } else {
 628        /* IO handlers are currently passed a physical address.
 629           It would be nice to pass an offset from the base address
 630           of that region.  This would avoid having to special case RAM,
 631           and avoid full address decoding in every device.
 632           We can't use the high bits of pd for this because
 633           IO_MEM_ROMD uses these as a ram address.  */
 634        iotlb = section - phys_sections;
 635        iotlb += memory_region_section_addr(section, paddr);
 636    }
 637
 638    /* Make accesses to pages with watchpoints go via the
 639       watchpoint trap routines.  */
 640    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 641        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
 642            /* Avoid trapping reads of pages with a write breakpoint. */
 643            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 644                iotlb = phys_section_watch + paddr;
 645                *address |= TLB_MMIO;
 646                break;
 647            }
 648        }
 649    }
 650
 651    return iotlb;
 652}
 653#endif /* defined(CONFIG_USER_ONLY) */
 654
 655#if !defined(CONFIG_USER_ONLY)
 656
 657#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 658typedef struct subpage_t {
 659    MemoryRegion iomem;
 660    hwaddr base;
 661    uint16_t sub_section[TARGET_PAGE_SIZE];
 662} subpage_t;
 663
 664static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 665                             uint16_t section);
 666static subpage_t *subpage_init(hwaddr base);
 667static void destroy_page_desc(uint16_t section_index)
 668{
 669    MemoryRegionSection *section = &phys_sections[section_index];
 670    MemoryRegion *mr = section->mr;
 671
 672    if (mr->subpage) {
 673        subpage_t *subpage = container_of(mr, subpage_t, iomem);
 674        memory_region_destroy(&subpage->iomem);
 675        g_free(subpage);
 676    }
 677}
 678
 679static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
 680{
 681    unsigned i;
 682    PhysPageEntry *p;
 683
 684    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 685        return;
 686    }
 687
 688    p = phys_map_nodes[lp->ptr];
 689    for (i = 0; i < L2_SIZE; ++i) {
 690        if (!p[i].is_leaf) {
 691            destroy_l2_mapping(&p[i], level - 1);
 692        } else {
 693            destroy_page_desc(p[i].ptr);
 694        }
 695    }
 696    lp->is_leaf = 0;
 697    lp->ptr = PHYS_MAP_NODE_NIL;
 698}
 699
 700static void destroy_all_mappings(AddressSpaceDispatch *d)
 701{
 702    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
 703    phys_map_nodes_reset();
 704}
 705
 706static uint16_t phys_section_add(MemoryRegionSection *section)
 707{
 708    if (phys_sections_nb == phys_sections_nb_alloc) {
 709        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
 710        phys_sections = g_renew(MemoryRegionSection, phys_sections,
 711                                phys_sections_nb_alloc);
 712    }
 713    phys_sections[phys_sections_nb] = *section;
 714    return phys_sections_nb++;
 715}
 716
 717static void phys_sections_clear(void)
 718{
 719    phys_sections_nb = 0;
 720}
 721
 722static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 723{
 724    subpage_t *subpage;
 725    hwaddr base = section->offset_within_address_space
 726        & TARGET_PAGE_MASK;
 727    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
 728    MemoryRegionSection subsection = {
 729        .offset_within_address_space = base,
 730        .size = TARGET_PAGE_SIZE,
 731    };
 732    hwaddr start, end;
 733
 734    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
 735
 736    if (!(existing->mr->subpage)) {
 737        subpage = subpage_init(base);
 738        subsection.mr = &subpage->iomem;
 739        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
 740                      phys_section_add(&subsection));
 741    } else {
 742        subpage = container_of(existing->mr, subpage_t, iomem);
 743    }
 744    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
 745    end = start + section->size - 1;
 746    subpage_register(subpage, start, end, phys_section_add(section));
 747}
 748
 749
 750static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 751{
 752    hwaddr start_addr = section->offset_within_address_space;
 753    ram_addr_t size = section->size;
 754    hwaddr addr;
 755    uint16_t section_index = phys_section_add(section);
 756
 757    assert(size);
 758
 759    addr = start_addr;
 760    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
 761                  section_index);
 762}
 763
 764static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 765{
 766    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
 767    MemoryRegionSection now = *section, remain = *section;
 768
 769    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
 770        || (now.size < TARGET_PAGE_SIZE)) {
 771        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
 772                       - now.offset_within_address_space,
 773                       now.size);
 774        register_subpage(d, &now);
 775        remain.size -= now.size;
 776        remain.offset_within_address_space += now.size;
 777        remain.offset_within_region += now.size;
 778    }
 779    while (remain.size >= TARGET_PAGE_SIZE) {
 780        now = remain;
 781        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
 782            now.size = TARGET_PAGE_SIZE;
 783            register_subpage(d, &now);
 784        } else {
 785            now.size &= TARGET_PAGE_MASK;
 786            register_multipage(d, &now);
 787        }
 788        remain.size -= now.size;
 789        remain.offset_within_address_space += now.size;
 790        remain.offset_within_region += now.size;
 791    }
 792    now = remain;
 793    if (now.size) {
 794        register_subpage(d, &now);
 795    }
 796}
 797
 798void qemu_flush_coalesced_mmio_buffer(void)
 799{
 800    if (kvm_enabled())
 801        kvm_flush_coalesced_mmio_buffer();
 802}
 803
 804void qemu_mutex_lock_ramlist(void)
 805{
 806    qemu_mutex_lock(&ram_list.mutex);
 807}
 808
 809void qemu_mutex_unlock_ramlist(void)
 810{
 811    qemu_mutex_unlock(&ram_list.mutex);
 812}
 813
 814#if defined(__linux__) && !defined(TARGET_S390X)
 815
 816#include <sys/vfs.h>
 817
 818#define HUGETLBFS_MAGIC       0x958458f6
 819
 820static long gethugepagesize(const char *path)
 821{
 822    struct statfs fs;
 823    int ret;
 824
 825    do {
 826        ret = statfs(path, &fs);
 827    } while (ret != 0 && errno == EINTR);
 828
 829    if (ret != 0) {
 830        perror(path);
 831        return 0;
 832    }
 833
 834    if (fs.f_type != HUGETLBFS_MAGIC)
 835        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
 836
 837    return fs.f_bsize;
 838}
 839
 840static void *file_ram_alloc(RAMBlock *block,
 841                            ram_addr_t memory,
 842                            const char *path)
 843{
 844    char *filename;
 845    void *area;
 846    int fd;
 847#ifdef MAP_POPULATE
 848    int flags;
 849#endif
 850    unsigned long hpagesize;
 851
 852    hpagesize = gethugepagesize(path);
 853    if (!hpagesize) {
 854        return NULL;
 855    }
 856
 857    if (memory < hpagesize) {
 858        return NULL;
 859    }
 860
 861    if (kvm_enabled() && !kvm_has_sync_mmu()) {
 862        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
 863        return NULL;
 864    }
 865
 866    filename = g_strdup_printf("%s/qemu_back_mem.XXXXXX", path);
 867
 868    fd = mkstemp(filename);
 869    if (fd < 0) {
 870        perror("unable to create backing store for hugepages");
 871        g_free(filename);
 872        return NULL;
 873    }
 874    unlink(filename);
 875    g_free(filename);
 876
 877    memory = (memory+hpagesize-1) & ~(hpagesize-1);
 878
 879    /*
 880     * ftruncate is not supported by hugetlbfs in older
 881     * hosts, so don't bother bailing out on errors.
 882     * If anything goes wrong with it under other filesystems,
 883     * mmap will fail.
 884     */
 885    if (ftruncate(fd, memory))
 886        perror("ftruncate");
 887
 888#ifdef MAP_POPULATE
 889    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
 890     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
 891     * to sidestep this quirk.
 892     */
 893    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
 894    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
 895#else
 896    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 897#endif
 898    if (area == MAP_FAILED) {
 899        perror("file_ram_alloc: can't mmap RAM pages");
 900        close(fd);
 901        return (NULL);
 902    }
 903    block->fd = fd;
 904    return area;
 905}
 906#endif
 907
 908static ram_addr_t find_ram_offset(ram_addr_t size)
 909{
 910    RAMBlock *block, *next_block;
 911    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
 912
 913    if (QTAILQ_EMPTY(&ram_list.blocks))
 914        return 0;
 915
 916    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 917        ram_addr_t end, next = RAM_ADDR_MAX;
 918
 919        end = block->offset + block->length;
 920
 921        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
 922            if (next_block->offset >= end) {
 923                next = MIN(next, next_block->offset);
 924            }
 925        }
 926        if (next - end >= size && next - end < mingap) {
 927            offset = end;
 928            mingap = next - end;
 929        }
 930    }
 931
 932    if (offset == RAM_ADDR_MAX) {
 933        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
 934                (uint64_t)size);
 935        abort();
 936    }
 937
 938    return offset;
 939}
 940
 941ram_addr_t last_ram_offset(void)
 942{
 943    RAMBlock *block;
 944    ram_addr_t last = 0;
 945
 946    QTAILQ_FOREACH(block, &ram_list.blocks, next)
 947        last = MAX(last, block->offset + block->length);
 948
 949    return last;
 950}
 951
 952static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
 953{
 954    int ret;
 955    QemuOpts *machine_opts;
 956
 957    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
 958    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
 959    if (machine_opts &&
 960        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
 961        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
 962        if (ret) {
 963            perror("qemu_madvise");
 964            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
 965                            "but dump_guest_core=off specified\n");
 966        }
 967    }
 968}
 969
 970void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 971{
 972    RAMBlock *new_block, *block;
 973
 974    new_block = NULL;
 975    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 976        if (block->offset == addr) {
 977            new_block = block;
 978            break;
 979        }
 980    }
 981    assert(new_block);
 982    assert(!new_block->idstr[0]);
 983
 984    if (dev) {
 985        char *id = qdev_get_dev_path(dev);
 986        if (id) {
 987            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
 988            g_free(id);
 989        }
 990    }
 991    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 992
 993    /* This assumes the iothread lock is taken here too.  */
 994    qemu_mutex_lock_ramlist();
 995    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 996        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
 997            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
 998                    new_block->idstr);
 999            abort();
1000        }
1001    }
1002    qemu_mutex_unlock_ramlist();
1003}
1004
1005static int memory_try_enable_merging(void *addr, size_t len)
1006{
1007    QemuOpts *opts;
1008
1009    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1010    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1011        /* disabled by the user */
1012        return 0;
1013    }
1014
1015    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1016}
1017
1018ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1019                                   MemoryRegion *mr)
1020{
1021    RAMBlock *block, *new_block;
1022
1023    size = TARGET_PAGE_ALIGN(size);
1024    new_block = g_malloc0(sizeof(*new_block));
1025
1026    /* This assumes the iothread lock is taken here too.  */
1027    qemu_mutex_lock_ramlist();
1028    new_block->mr = mr;
1029    new_block->offset = find_ram_offset(size);
1030    if (host) {
1031        new_block->host = host;
1032        new_block->flags |= RAM_PREALLOC_MASK;
1033    } else {
1034        if (mem_path) {
1035#if defined (__linux__) && !defined(TARGET_S390X)
1036            new_block->host = file_ram_alloc(new_block, size, mem_path);
1037            if (!new_block->host) {
1038                new_block->host = qemu_vmalloc(size);
1039                memory_try_enable_merging(new_block->host, size);
1040            }
1041#else
1042            fprintf(stderr, "-mem-path option unsupported\n");
1043            exit(1);
1044#endif
1045        } else {
1046            if (xen_enabled()) {
1047                xen_ram_alloc(new_block->offset, size, mr);
1048            } else if (kvm_enabled()) {
1049                /* some s390/kvm configurations have special constraints */
1050                new_block->host = kvm_vmalloc(size);
1051            } else {
1052                new_block->host = qemu_vmalloc(size);
1053            }
1054            memory_try_enable_merging(new_block->host, size);
1055        }
1056    }
1057    new_block->length = size;
1058
1059    /* Keep the list sorted from biggest to smallest block.  */
1060    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1061        if (block->length < new_block->length) {
1062            break;
1063        }
1064    }
1065    if (block) {
1066        QTAILQ_INSERT_BEFORE(block, new_block, next);
1067    } else {
1068        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1069    }
1070    ram_list.mru_block = NULL;
1071
1072    ram_list.version++;
1073    qemu_mutex_unlock_ramlist();
1074
1075    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1076                                       last_ram_offset() >> TARGET_PAGE_BITS);
1077    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1078           0, size >> TARGET_PAGE_BITS);
1079    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1080
1081    qemu_ram_setup_dump(new_block->host, size);
1082    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1083
1084    if (kvm_enabled())
1085        kvm_setup_guest_memory(new_block->host, size);
1086
1087    return new_block->offset;
1088}
1089
1090ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1091{
1092    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1093}
1094
1095void qemu_ram_free_from_ptr(ram_addr_t addr)
1096{
1097    RAMBlock *block;
1098
1099    /* This assumes the iothread lock is taken here too.  */
1100    qemu_mutex_lock_ramlist();
1101    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1102        if (addr == block->offset) {
1103            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1104            ram_list.mru_block = NULL;
1105            ram_list.version++;
1106            g_free(block);
1107            break;
1108        }
1109    }
1110    qemu_mutex_unlock_ramlist();
1111}
1112
1113void qemu_ram_free(ram_addr_t addr)
1114{
1115    RAMBlock *block;
1116
1117    /* This assumes the iothread lock is taken here too.  */
1118    qemu_mutex_lock_ramlist();
1119    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1120        if (addr == block->offset) {
1121            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1122            ram_list.mru_block = NULL;
1123            ram_list.version++;
1124            if (block->flags & RAM_PREALLOC_MASK) {
1125                ;
1126            } else if (mem_path) {
1127#if defined (__linux__) && !defined(TARGET_S390X)
1128                if (block->fd) {
1129                    munmap(block->host, block->length);
1130                    close(block->fd);
1131                } else {
1132                    qemu_vfree(block->host);
1133                }
1134#else
1135                abort();
1136#endif
1137            } else {
1138#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1139                munmap(block->host, block->length);
1140#else
1141                if (xen_enabled()) {
1142                    xen_invalidate_map_cache_entry(block->host);
1143                } else {
1144                    qemu_vfree(block->host);
1145                }
1146#endif
1147            }
1148            g_free(block);
1149            break;
1150        }
1151    }
1152    qemu_mutex_unlock_ramlist();
1153
1154}
1155
1156#ifndef _WIN32
1157void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1158{
1159    RAMBlock *block;
1160    ram_addr_t offset;
1161    int flags;
1162    void *area, *vaddr;
1163
1164    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1165        offset = addr - block->offset;
1166        if (offset < block->length) {
1167            vaddr = block->host + offset;
1168            if (block->flags & RAM_PREALLOC_MASK) {
1169                ;
1170            } else {
1171                flags = MAP_FIXED;
1172                munmap(vaddr, length);
1173                if (mem_path) {
1174#if defined(__linux__) && !defined(TARGET_S390X)
1175                    if (block->fd) {
1176#ifdef MAP_POPULATE
1177                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1178                            MAP_PRIVATE;
1179#else
1180                        flags |= MAP_PRIVATE;
1181#endif
1182                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1183                                    flags, block->fd, offset);
1184                    } else {
1185                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1186                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1187                                    flags, -1, 0);
1188                    }
1189#else
1190                    abort();
1191#endif
1192                } else {
1193#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1194                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1195                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1196                                flags, -1, 0);
1197#else
1198                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1199                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1200                                flags, -1, 0);
1201#endif
1202                }
1203                if (area != vaddr) {
1204                    fprintf(stderr, "Could not remap addr: "
1205                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1206                            length, addr);
1207                    exit(1);
1208                }
1209                memory_try_enable_merging(vaddr, length);
1210                qemu_ram_setup_dump(vaddr, length);
1211            }
1212            return;
1213        }
1214    }
1215}
1216#endif /* !_WIN32 */
1217
1218/* Return a host pointer to ram allocated with qemu_ram_alloc.
1219   With the exception of the softmmu code in this file, this should
1220   only be used for local memory (e.g. video ram) that the device owns,
1221   and knows it isn't going to access beyond the end of the block.
1222
1223   It should not be used for general purpose DMA.
1224   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1225 */
1226void *qemu_get_ram_ptr(ram_addr_t addr)
1227{
1228    RAMBlock *block;
1229
1230    /* The list is protected by the iothread lock here.  */
1231    block = ram_list.mru_block;
1232    if (block && addr - block->offset < block->length) {
1233        goto found;
1234    }
1235    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1236        if (addr - block->offset < block->length) {
1237            goto found;
1238        }
1239    }
1240
1241    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1242    abort();
1243
1244found:
1245    ram_list.mru_block = block;
1246    if (xen_enabled()) {
1247        /* We need to check if the requested address is in the RAM
1248         * because we don't want to map the entire memory in QEMU.
1249         * In that case just map until the end of the page.
1250         */
1251        if (block->offset == 0) {
1252            return xen_map_cache(addr, 0, 0);
1253        } else if (block->host == NULL) {
1254            block->host =
1255                xen_map_cache(block->offset, block->length, 1);
1256        }
1257    }
1258    return block->host + (addr - block->offset);
1259}
1260
1261/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1262 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1263 *
1264 * ??? Is this still necessary?
1265 */
1266static void *qemu_safe_ram_ptr(ram_addr_t addr)
1267{
1268    RAMBlock *block;
1269
1270    /* The list is protected by the iothread lock here.  */
1271    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1272        if (addr - block->offset < block->length) {
1273            if (xen_enabled()) {
1274                /* We need to check if the requested address is in the RAM
1275                 * because we don't want to map the entire memory in QEMU.
1276                 * In that case just map until the end of the page.
1277                 */
1278                if (block->offset == 0) {
1279                    return xen_map_cache(addr, 0, 0);
1280                } else if (block->host == NULL) {
1281                    block->host =
1282                        xen_map_cache(block->offset, block->length, 1);
1283                }
1284            }
1285            return block->host + (addr - block->offset);
1286        }
1287    }
1288
1289    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1290    abort();
1291
1292    return NULL;
1293}
1294
1295/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1296 * but takes a size argument */
1297static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1298{
1299    if (*size == 0) {
1300        return NULL;
1301    }
1302    if (xen_enabled()) {
1303        return xen_map_cache(addr, *size, 1);
1304    } else {
1305        RAMBlock *block;
1306
1307        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1308            if (addr - block->offset < block->length) {
1309                if (addr - block->offset + *size > block->length)
1310                    *size = block->length - addr + block->offset;
1311                return block->host + (addr - block->offset);
1312            }
1313        }
1314
1315        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1316        abort();
1317    }
1318}
1319
1320void qemu_put_ram_ptr(void *addr)
1321{
1322    trace_qemu_put_ram_ptr(addr);
1323}
1324
1325int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1326{
1327    RAMBlock *block;
1328    uint8_t *host = ptr;
1329
1330    if (xen_enabled()) {
1331        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1332        return 0;
1333    }
1334
1335    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1336        /* This case append when the block is not mapped. */
1337        if (block->host == NULL) {
1338            continue;
1339        }
1340        if (host - block->host < block->length) {
1341            *ram_addr = block->offset + (host - block->host);
1342            return 0;
1343        }
1344    }
1345
1346    return -1;
1347}
1348
1349/* Some of the softmmu routines need to translate from a host pointer
1350   (typically a TLB entry) back to a ram offset.  */
1351ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1352{
1353    ram_addr_t ram_addr;
1354
1355    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1356        fprintf(stderr, "Bad ram pointer %p\n", ptr);
1357        abort();
1358    }
1359    return ram_addr;
1360}
1361
1362static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1363                                    unsigned size)
1364{
1365#ifdef DEBUG_UNASSIGNED
1366    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1367#endif
1368#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1369    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1370#endif
1371    return 0;
1372}
1373
1374static void unassigned_mem_write(void *opaque, hwaddr addr,
1375                                 uint64_t val, unsigned size)
1376{
1377#ifdef DEBUG_UNASSIGNED
1378    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1379#endif
1380#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1381    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1382#endif
1383}
1384
1385static const MemoryRegionOps unassigned_mem_ops = {
1386    .read = unassigned_mem_read,
1387    .write = unassigned_mem_write,
1388    .endianness = DEVICE_NATIVE_ENDIAN,
1389};
1390
1391static uint64_t error_mem_read(void *opaque, hwaddr addr,
1392                               unsigned size)
1393{
1394    abort();
1395}
1396
1397static void error_mem_write(void *opaque, hwaddr addr,
1398                            uint64_t value, unsigned size)
1399{
1400    abort();
1401}
1402
1403static const MemoryRegionOps error_mem_ops = {
1404    .read = error_mem_read,
1405    .write = error_mem_write,
1406    .endianness = DEVICE_NATIVE_ENDIAN,
1407};
1408
1409static const MemoryRegionOps rom_mem_ops = {
1410    .read = error_mem_read,
1411    .write = unassigned_mem_write,
1412    .endianness = DEVICE_NATIVE_ENDIAN,
1413};
1414
1415static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1416                               uint64_t val, unsigned size)
1417{
1418    int dirty_flags;
1419    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1420    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1421#if !defined(CONFIG_USER_ONLY)
1422        tb_invalidate_phys_page_fast(ram_addr, size);
1423        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1424#endif
1425    }
1426    switch (size) {
1427    case 1:
1428        stb_p(qemu_get_ram_ptr(ram_addr), val);
1429        break;
1430    case 2:
1431        stw_p(qemu_get_ram_ptr(ram_addr), val);
1432        break;
1433    case 4:
1434        stl_p(qemu_get_ram_ptr(ram_addr), val);
1435        break;
1436    default:
1437        abort();
1438    }
1439    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1440    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1441    /* we remove the notdirty callback only if the code has been
1442       flushed */
1443    if (dirty_flags == 0xff)
1444        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1445}
1446
1447static const MemoryRegionOps notdirty_mem_ops = {
1448    .read = error_mem_read,
1449    .write = notdirty_mem_write,
1450    .endianness = DEVICE_NATIVE_ENDIAN,
1451};
1452
1453/* Generate a debug exception if a watchpoint has been hit.  */
1454static void check_watchpoint(int offset, int len_mask, int flags)
1455{
1456    CPUArchState *env = cpu_single_env;
1457    target_ulong pc, cs_base;
1458    target_ulong vaddr;
1459    CPUWatchpoint *wp;
1460    int cpu_flags;
1461
1462    if (env->watchpoint_hit) {
1463        /* We re-entered the check after replacing the TB. Now raise
1464         * the debug interrupt so that is will trigger after the
1465         * current instruction. */
1466        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1467        return;
1468    }
1469    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1470    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1471        if ((vaddr == (wp->vaddr & len_mask) ||
1472             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1473            wp->flags |= BP_WATCHPOINT_HIT;
1474            if (!env->watchpoint_hit) {
1475                env->watchpoint_hit = wp;
1476                tb_check_watchpoint(env);
1477                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1478                    env->exception_index = EXCP_DEBUG;
1479                    cpu_loop_exit(env);
1480                } else {
1481                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1482                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1483                    cpu_resume_from_signal(env, NULL);
1484                }
1485            }
1486        } else {
1487            wp->flags &= ~BP_WATCHPOINT_HIT;
1488        }
1489    }
1490}
1491
1492/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1493   so these check for a hit then pass through to the normal out-of-line
1494   phys routines.  */
1495static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1496                               unsigned size)
1497{
1498    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1499    switch (size) {
1500    case 1: return ldub_phys(addr);
1501    case 2: return lduw_phys(addr);
1502    case 4: return ldl_phys(addr);
1503    default: abort();
1504    }
1505}
1506
1507static void watch_mem_write(void *opaque, hwaddr addr,
1508                            uint64_t val, unsigned size)
1509{
1510    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1511    switch (size) {
1512    case 1:
1513        stb_phys(addr, val);
1514        break;
1515    case 2:
1516        stw_phys(addr, val);
1517        break;
1518    case 4:
1519        stl_phys(addr, val);
1520        break;
1521    default: abort();
1522    }
1523}
1524
1525static const MemoryRegionOps watch_mem_ops = {
1526    .read = watch_mem_read,
1527    .write = watch_mem_write,
1528    .endianness = DEVICE_NATIVE_ENDIAN,
1529};
1530
1531static uint64_t subpage_read(void *opaque, hwaddr addr,
1532                             unsigned len)
1533{
1534    subpage_t *mmio = opaque;
1535    unsigned int idx = SUBPAGE_IDX(addr);
1536    MemoryRegionSection *section;
1537#if defined(DEBUG_SUBPAGE)
1538    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1539           mmio, len, addr, idx);
1540#endif
1541
1542    section = &phys_sections[mmio->sub_section[idx]];
1543    addr += mmio->base;
1544    addr -= section->offset_within_address_space;
1545    addr += section->offset_within_region;
1546    return io_mem_read(section->mr, addr, len);
1547}
1548
1549static void subpage_write(void *opaque, hwaddr addr,
1550                          uint64_t value, unsigned len)
1551{
1552    subpage_t *mmio = opaque;
1553    unsigned int idx = SUBPAGE_IDX(addr);
1554    MemoryRegionSection *section;
1555#if defined(DEBUG_SUBPAGE)
1556    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1557           " idx %d value %"PRIx64"\n",
1558           __func__, mmio, len, addr, idx, value);
1559#endif
1560
1561    section = &phys_sections[mmio->sub_section[idx]];
1562    addr += mmio->base;
1563    addr -= section->offset_within_address_space;
1564    addr += section->offset_within_region;
1565    io_mem_write(section->mr, addr, value, len);
1566}
1567
1568static const MemoryRegionOps subpage_ops = {
1569    .read = subpage_read,
1570    .write = subpage_write,
1571    .endianness = DEVICE_NATIVE_ENDIAN,
1572};
1573
1574static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1575                                 unsigned size)
1576{
1577    ram_addr_t raddr = addr;
1578    void *ptr = qemu_get_ram_ptr(raddr);
1579    switch (size) {
1580    case 1: return ldub_p(ptr);
1581    case 2: return lduw_p(ptr);
1582    case 4: return ldl_p(ptr);
1583    default: abort();
1584    }
1585}
1586
1587static void subpage_ram_write(void *opaque, hwaddr addr,
1588                              uint64_t value, unsigned size)
1589{
1590    ram_addr_t raddr = addr;
1591    void *ptr = qemu_get_ram_ptr(raddr);
1592    switch (size) {
1593    case 1: return stb_p(ptr, value);
1594    case 2: return stw_p(ptr, value);
1595    case 4: return stl_p(ptr, value);
1596    default: abort();
1597    }
1598}
1599
1600static const MemoryRegionOps subpage_ram_ops = {
1601    .read = subpage_ram_read,
1602    .write = subpage_ram_write,
1603    .endianness = DEVICE_NATIVE_ENDIAN,
1604};
1605
1606static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1607                             uint16_t section)
1608{
1609    int idx, eidx;
1610
1611    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1612        return -1;
1613    idx = SUBPAGE_IDX(start);
1614    eidx = SUBPAGE_IDX(end);
1615#if defined(DEBUG_SUBPAGE)
1616    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1617           mmio, start, end, idx, eidx, memory);
1618#endif
1619    if (memory_region_is_ram(phys_sections[section].mr)) {
1620        MemoryRegionSection new_section = phys_sections[section];
1621        new_section.mr = &io_mem_subpage_ram;
1622        section = phys_section_add(&new_section);
1623    }
1624    for (; idx <= eidx; idx++) {
1625        mmio->sub_section[idx] = section;
1626    }
1627
1628    return 0;
1629}
1630
1631static subpage_t *subpage_init(hwaddr base)
1632{
1633    subpage_t *mmio;
1634
1635    mmio = g_malloc0(sizeof(subpage_t));
1636
1637    mmio->base = base;
1638    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1639                          "subpage", TARGET_PAGE_SIZE);
1640    mmio->iomem.subpage = true;
1641#if defined(DEBUG_SUBPAGE)
1642    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1643           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1644#endif
1645    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1646
1647    return mmio;
1648}
1649
1650static uint16_t dummy_section(MemoryRegion *mr)
1651{
1652    MemoryRegionSection section = {
1653        .mr = mr,
1654        .offset_within_address_space = 0,
1655        .offset_within_region = 0,
1656        .size = UINT64_MAX,
1657    };
1658
1659    return phys_section_add(&section);
1660}
1661
1662MemoryRegion *iotlb_to_region(hwaddr index)
1663{
1664    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1665}
1666
1667static void io_mem_init(void)
1668{
1669    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1670    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1671    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1672                          "unassigned", UINT64_MAX);
1673    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1674                          "notdirty", UINT64_MAX);
1675    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1676                          "subpage-ram", UINT64_MAX);
1677    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1678                          "watch", UINT64_MAX);
1679}
1680
1681static void mem_begin(MemoryListener *listener)
1682{
1683    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1684
1685    destroy_all_mappings(d);
1686    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1687}
1688
1689static void core_begin(MemoryListener *listener)
1690{
1691    phys_sections_clear();
1692    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1693    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1694    phys_section_rom = dummy_section(&io_mem_rom);
1695    phys_section_watch = dummy_section(&io_mem_watch);
1696}
1697
1698static void tcg_commit(MemoryListener *listener)
1699{
1700    CPUArchState *env;
1701
1702    /* since each CPU stores ram addresses in its TLB cache, we must
1703       reset the modified entries */
1704    /* XXX: slow ! */
1705    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1706        tlb_flush(env, 1);
1707    }
1708}
1709
1710static void core_log_global_start(MemoryListener *listener)
1711{
1712    cpu_physical_memory_set_dirty_tracking(1);
1713}
1714
1715static void core_log_global_stop(MemoryListener *listener)
1716{
1717    cpu_physical_memory_set_dirty_tracking(0);
1718}
1719
1720static void io_region_add(MemoryListener *listener,
1721                          MemoryRegionSection *section)
1722{
1723    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1724
1725    mrio->mr = section->mr;
1726    mrio->offset = section->offset_within_region;
1727    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1728                 section->offset_within_address_space, section->size);
1729    ioport_register(&mrio->iorange);
1730}
1731
1732static void io_region_del(MemoryListener *listener,
1733                          MemoryRegionSection *section)
1734{
1735    isa_unassign_ioport(section->offset_within_address_space, section->size);
1736}
1737
1738static MemoryListener core_memory_listener = {
1739    .begin = core_begin,
1740    .log_global_start = core_log_global_start,
1741    .log_global_stop = core_log_global_stop,
1742    .priority = 1,
1743};
1744
1745static MemoryListener io_memory_listener = {
1746    .region_add = io_region_add,
1747    .region_del = io_region_del,
1748    .priority = 0,
1749};
1750
1751static MemoryListener tcg_memory_listener = {
1752    .commit = tcg_commit,
1753};
1754
1755void address_space_init_dispatch(AddressSpace *as)
1756{
1757    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1758
1759    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1760    d->listener = (MemoryListener) {
1761        .begin = mem_begin,
1762        .region_add = mem_add,
1763        .region_nop = mem_add,
1764        .priority = 0,
1765    };
1766    as->dispatch = d;
1767    memory_listener_register(&d->listener, as);
1768}
1769
1770void address_space_destroy_dispatch(AddressSpace *as)
1771{
1772    AddressSpaceDispatch *d = as->dispatch;
1773
1774    memory_listener_unregister(&d->listener);
1775    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1776    g_free(d);
1777    as->dispatch = NULL;
1778}
1779
1780static void memory_map_init(void)
1781{
1782    system_memory = g_malloc(sizeof(*system_memory));
1783    memory_region_init(system_memory, "system", INT64_MAX);
1784    address_space_init(&address_space_memory, system_memory);
1785    address_space_memory.name = "memory";
1786
1787    system_io = g_malloc(sizeof(*system_io));
1788    memory_region_init(system_io, "io", 65536);
1789    address_space_init(&address_space_io, system_io);
1790    address_space_io.name = "I/O";
1791
1792    memory_listener_register(&core_memory_listener, &address_space_memory);
1793    memory_listener_register(&io_memory_listener, &address_space_io);
1794    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1795
1796    dma_context_init(&dma_context_memory, &address_space_memory,
1797                     NULL, NULL, NULL);
1798}
1799
1800MemoryRegion *get_system_memory(void)
1801{
1802    return system_memory;
1803}
1804
1805MemoryRegion *get_system_io(void)
1806{
1807    return system_io;
1808}
1809
1810#endif /* !defined(CONFIG_USER_ONLY) */
1811
1812/* physical memory access (slow version, mainly for debug) */
1813#if defined(CONFIG_USER_ONLY)
1814int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1815                        uint8_t *buf, int len, int is_write)
1816{
1817    int l, flags;
1818    target_ulong page;
1819    void * p;
1820
1821    while (len > 0) {
1822        page = addr & TARGET_PAGE_MASK;
1823        l = (page + TARGET_PAGE_SIZE) - addr;
1824        if (l > len)
1825            l = len;
1826        flags = page_get_flags(page);
1827        if (!(flags & PAGE_VALID))
1828            return -1;
1829        if (is_write) {
1830            if (!(flags & PAGE_WRITE))
1831                return -1;
1832            /* XXX: this code should not depend on lock_user */
1833            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1834                return -1;
1835            memcpy(p, buf, l);
1836            unlock_user(p, addr, l);
1837        } else {
1838            if (!(flags & PAGE_READ))
1839                return -1;
1840            /* XXX: this code should not depend on lock_user */
1841            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1842                return -1;
1843            memcpy(buf, p, l);
1844            unlock_user(p, addr, 0);
1845        }
1846        len -= l;
1847        buf += l;
1848        addr += l;
1849    }
1850    return 0;
1851}
1852
1853#else
1854
1855static void invalidate_and_set_dirty(hwaddr addr,
1856                                     hwaddr length)
1857{
1858    if (!cpu_physical_memory_is_dirty(addr)) {
1859        /* invalidate code */
1860        tb_invalidate_phys_page_range(addr, addr + length, 0);
1861        /* set dirty bit */
1862        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1863    }
1864    xen_modified_memory(addr, length);
1865}
1866
1867void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1868                      int len, bool is_write)
1869{
1870    AddressSpaceDispatch *d = as->dispatch;
1871    int l;
1872    uint8_t *ptr;
1873    uint32_t val;
1874    hwaddr page;
1875    MemoryRegionSection *section;
1876
1877    while (len > 0) {
1878        page = addr & TARGET_PAGE_MASK;
1879        l = (page + TARGET_PAGE_SIZE) - addr;
1880        if (l > len)
1881            l = len;
1882        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1883
1884        if (is_write) {
1885            if (!memory_region_is_ram(section->mr)) {
1886                hwaddr addr1;
1887                addr1 = memory_region_section_addr(section, addr);
1888                /* XXX: could force cpu_single_env to NULL to avoid
1889                   potential bugs */
1890                if (l >= 4 && ((addr1 & 3) == 0)) {
1891                    /* 32 bit write access */
1892                    val = ldl_p(buf);
1893                    io_mem_write(section->mr, addr1, val, 4);
1894                    l = 4;
1895                } else if (l >= 2 && ((addr1 & 1) == 0)) {
1896                    /* 16 bit write access */
1897                    val = lduw_p(buf);
1898                    io_mem_write(section->mr, addr1, val, 2);
1899                    l = 2;
1900                } else {
1901                    /* 8 bit write access */
1902                    val = ldub_p(buf);
1903                    io_mem_write(section->mr, addr1, val, 1);
1904                    l = 1;
1905                }
1906            } else if (!section->readonly) {
1907                ram_addr_t addr1;
1908                addr1 = memory_region_get_ram_addr(section->mr)
1909                    + memory_region_section_addr(section, addr);
1910                /* RAM case */
1911                ptr = qemu_get_ram_ptr(addr1);
1912                memcpy(ptr, buf, l);
1913                invalidate_and_set_dirty(addr1, l);
1914                qemu_put_ram_ptr(ptr);
1915            }
1916        } else {
1917            if (!(memory_region_is_ram(section->mr) ||
1918                  memory_region_is_romd(section->mr))) {
1919                hwaddr addr1;
1920                /* I/O case */
1921                addr1 = memory_region_section_addr(section, addr);
1922                if (l >= 4 && ((addr1 & 3) == 0)) {
1923                    /* 32 bit read access */
1924                    val = io_mem_read(section->mr, addr1, 4);
1925                    stl_p(buf, val);
1926                    l = 4;
1927                } else if (l >= 2 && ((addr1 & 1) == 0)) {
1928                    /* 16 bit read access */
1929                    val = io_mem_read(section->mr, addr1, 2);
1930                    stw_p(buf, val);
1931                    l = 2;
1932                } else {
1933                    /* 8 bit read access */
1934                    val = io_mem_read(section->mr, addr1, 1);
1935                    stb_p(buf, val);
1936                    l = 1;
1937                }
1938            } else {
1939                /* RAM case */
1940                ptr = qemu_get_ram_ptr(section->mr->ram_addr
1941                                       + memory_region_section_addr(section,
1942                                                                    addr));
1943                memcpy(buf, ptr, l);
1944                qemu_put_ram_ptr(ptr);
1945            }
1946        }
1947        len -= l;
1948        buf += l;
1949        addr += l;
1950    }
1951}
1952
1953void address_space_write(AddressSpace *as, hwaddr addr,
1954                         const uint8_t *buf, int len)
1955{
1956    address_space_rw(as, addr, (uint8_t *)buf, len, true);
1957}
1958
1959/**
1960 * address_space_read: read from an address space.
1961 *
1962 * @as: #AddressSpace to be accessed
1963 * @addr: address within that address space
1964 * @buf: buffer with the data transferred
1965 */
1966void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1967{
1968    address_space_rw(as, addr, buf, len, false);
1969}
1970
1971
1972void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1973                            int len, int is_write)
1974{
1975    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1976}
1977
1978/* used for ROM loading : can write in RAM and ROM */
1979void cpu_physical_memory_write_rom(hwaddr addr,
1980                                   const uint8_t *buf, int len)
1981{
1982    AddressSpaceDispatch *d = address_space_memory.dispatch;
1983    int l;
1984    uint8_t *ptr;
1985    hwaddr page;
1986    MemoryRegionSection *section;
1987
1988    while (len > 0) {
1989        page = addr & TARGET_PAGE_MASK;
1990        l = (page + TARGET_PAGE_SIZE) - addr;
1991        if (l > len)
1992            l = len;
1993        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1994
1995        if (!(memory_region_is_ram(section->mr) ||
1996              memory_region_is_romd(section->mr))) {
1997            /* do nothing */
1998        } else {
1999            unsigned long addr1;
2000            addr1 = memory_region_get_ram_addr(section->mr)
2001                + memory_region_section_addr(section, addr);
2002            /* ROM/RAM case */
2003            ptr = qemu_get_ram_ptr(addr1);
2004            memcpy(ptr, buf, l);
2005            invalidate_and_set_dirty(addr1, l);
2006            qemu_put_ram_ptr(ptr);
2007        }
2008        len -= l;
2009        buf += l;
2010        addr += l;
2011    }
2012}
2013
2014typedef struct {
2015    void *buffer;
2016    hwaddr addr;
2017    hwaddr len;
2018} BounceBuffer;
2019
2020static BounceBuffer bounce;
2021
2022typedef struct MapClient {
2023    void *opaque;
2024    void (*callback)(void *opaque);
2025    QLIST_ENTRY(MapClient) link;
2026} MapClient;
2027
2028static QLIST_HEAD(map_client_list, MapClient) map_client_list
2029    = QLIST_HEAD_INITIALIZER(map_client_list);
2030
2031void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2032{
2033    MapClient *client = g_malloc(sizeof(*client));
2034
2035    client->opaque = opaque;
2036    client->callback = callback;
2037    QLIST_INSERT_HEAD(&map_client_list, client, link);
2038    return client;
2039}
2040
2041static void cpu_unregister_map_client(void *_client)
2042{
2043    MapClient *client = (MapClient *)_client;
2044
2045    QLIST_REMOVE(client, link);
2046    g_free(client);
2047}
2048
2049static void cpu_notify_map_clients(void)
2050{
2051    MapClient *client;
2052
2053    while (!QLIST_EMPTY(&map_client_list)) {
2054        client = QLIST_FIRST(&map_client_list);
2055        client->callback(client->opaque);
2056        cpu_unregister_map_client(client);
2057    }
2058}
2059
2060/* Map a physical memory region into a host virtual address.
2061 * May map a subset of the requested range, given by and returned in *plen.
2062 * May return NULL if resources needed to perform the mapping are exhausted.
2063 * Use only for reads OR writes - not for read-modify-write operations.
2064 * Use cpu_register_map_client() to know when retrying the map operation is
2065 * likely to succeed.
2066 */
2067void *address_space_map(AddressSpace *as,
2068                        hwaddr addr,
2069                        hwaddr *plen,
2070                        bool is_write)
2071{
2072    AddressSpaceDispatch *d = as->dispatch;
2073    hwaddr len = *plen;
2074    hwaddr todo = 0;
2075    int l;
2076    hwaddr page;
2077    MemoryRegionSection *section;
2078    ram_addr_t raddr = RAM_ADDR_MAX;
2079    ram_addr_t rlen;
2080    void *ret;
2081
2082    while (len > 0) {
2083        page = addr & TARGET_PAGE_MASK;
2084        l = (page + TARGET_PAGE_SIZE) - addr;
2085        if (l > len)
2086            l = len;
2087        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2088
2089        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2090            if (todo || bounce.buffer) {
2091                break;
2092            }
2093            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2094            bounce.addr = addr;
2095            bounce.len = l;
2096            if (!is_write) {
2097                address_space_read(as, addr, bounce.buffer, l);
2098            }
2099
2100            *plen = l;
2101            return bounce.buffer;
2102        }
2103        if (!todo) {
2104            raddr = memory_region_get_ram_addr(section->mr)
2105                + memory_region_section_addr(section, addr);
2106        }
2107
2108        len -= l;
2109        addr += l;
2110        todo += l;
2111    }
2112    rlen = todo;
2113    ret = qemu_ram_ptr_length(raddr, &rlen);
2114    *plen = rlen;
2115    return ret;
2116}
2117
2118/* Unmaps a memory region previously mapped by address_space_map().
2119 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2120 * the amount of memory that was actually read or written by the caller.
2121 */
2122void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2123                         int is_write, hwaddr access_len)
2124{
2125    if (buffer != bounce.buffer) {
2126        if (is_write) {
2127            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2128            while (access_len) {
2129                unsigned l;
2130                l = TARGET_PAGE_SIZE;
2131                if (l > access_len)
2132                    l = access_len;
2133                invalidate_and_set_dirty(addr1, l);
2134                addr1 += l;
2135                access_len -= l;
2136            }
2137        }
2138        if (xen_enabled()) {
2139            xen_invalidate_map_cache_entry(buffer);
2140        }
2141        return;
2142    }
2143    if (is_write) {
2144        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2145    }
2146    qemu_vfree(bounce.buffer);
2147    bounce.buffer = NULL;
2148    cpu_notify_map_clients();
2149}
2150
2151void *cpu_physical_memory_map(hwaddr addr,
2152                              hwaddr *plen,
2153                              int is_write)
2154{
2155    return address_space_map(&address_space_memory, addr, plen, is_write);
2156}
2157
2158void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2159                               int is_write, hwaddr access_len)
2160{
2161    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2162}
2163
2164/* warning: addr must be aligned */
2165static inline uint32_t ldl_phys_internal(hwaddr addr,
2166                                         enum device_endian endian)
2167{
2168    uint8_t *ptr;
2169    uint32_t val;
2170    MemoryRegionSection *section;
2171
2172    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2173
2174    if (!(memory_region_is_ram(section->mr) ||
2175          memory_region_is_romd(section->mr))) {
2176        /* I/O case */
2177        addr = memory_region_section_addr(section, addr);
2178        val = io_mem_read(section->mr, addr, 4);
2179#if defined(TARGET_WORDS_BIGENDIAN)
2180        if (endian == DEVICE_LITTLE_ENDIAN) {
2181            val = bswap32(val);
2182        }
2183#else
2184        if (endian == DEVICE_BIG_ENDIAN) {
2185            val = bswap32(val);
2186        }
2187#endif
2188    } else {
2189        /* RAM case */
2190        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2191                                & TARGET_PAGE_MASK)
2192                               + memory_region_section_addr(section, addr));
2193        switch (endian) {
2194        case DEVICE_LITTLE_ENDIAN:
2195            val = ldl_le_p(ptr);
2196            break;
2197        case DEVICE_BIG_ENDIAN:
2198            val = ldl_be_p(ptr);
2199            break;
2200        default:
2201            val = ldl_p(ptr);
2202            break;
2203        }
2204    }
2205    return val;
2206}
2207
2208uint32_t ldl_phys(hwaddr addr)
2209{
2210    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2211}
2212
2213uint32_t ldl_le_phys(hwaddr addr)
2214{
2215    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2216}
2217
2218uint32_t ldl_be_phys(hwaddr addr)
2219{
2220    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2221}
2222
2223/* warning: addr must be aligned */
2224static inline uint64_t ldq_phys_internal(hwaddr addr,
2225                                         enum device_endian endian)
2226{
2227    uint8_t *ptr;
2228    uint64_t val;
2229    MemoryRegionSection *section;
2230
2231    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2232
2233    if (!(memory_region_is_ram(section->mr) ||
2234          memory_region_is_romd(section->mr))) {
2235        /* I/O case */
2236        addr = memory_region_section_addr(section, addr);
2237
2238        /* XXX This is broken when device endian != cpu endian.
2239               Fix and add "endian" variable check */
2240#ifdef TARGET_WORDS_BIGENDIAN
2241        val = io_mem_read(section->mr, addr, 4) << 32;
2242        val |= io_mem_read(section->mr, addr + 4, 4);
2243#else
2244        val = io_mem_read(section->mr, addr, 4);
2245        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2246#endif
2247    } else {
2248        /* RAM case */
2249        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2250                                & TARGET_PAGE_MASK)
2251                               + memory_region_section_addr(section, addr));
2252        switch (endian) {
2253        case DEVICE_LITTLE_ENDIAN:
2254            val = ldq_le_p(ptr);
2255            break;
2256        case DEVICE_BIG_ENDIAN:
2257            val = ldq_be_p(ptr);
2258            break;
2259        default:
2260            val = ldq_p(ptr);
2261            break;
2262        }
2263    }
2264    return val;
2265}
2266
2267uint64_t ldq_phys(hwaddr addr)
2268{
2269    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2270}
2271
2272uint64_t ldq_le_phys(hwaddr addr)
2273{
2274    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2275}
2276
2277uint64_t ldq_be_phys(hwaddr addr)
2278{
2279    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2280}
2281
2282/* XXX: optimize */
2283uint32_t ldub_phys(hwaddr addr)
2284{
2285    uint8_t val;
2286    cpu_physical_memory_read(addr, &val, 1);
2287    return val;
2288}
2289
2290/* warning: addr must be aligned */
2291static inline uint32_t lduw_phys_internal(hwaddr addr,
2292                                          enum device_endian endian)
2293{
2294    uint8_t *ptr;
2295    uint64_t val;
2296    MemoryRegionSection *section;
2297
2298    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2299
2300    if (!(memory_region_is_ram(section->mr) ||
2301          memory_region_is_romd(section->mr))) {
2302        /* I/O case */
2303        addr = memory_region_section_addr(section, addr);
2304        val = io_mem_read(section->mr, addr, 2);
2305#if defined(TARGET_WORDS_BIGENDIAN)
2306        if (endian == DEVICE_LITTLE_ENDIAN) {
2307            val = bswap16(val);
2308        }
2309#else
2310        if (endian == DEVICE_BIG_ENDIAN) {
2311            val = bswap16(val);
2312        }
2313#endif
2314    } else {
2315        /* RAM case */
2316        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2317                                & TARGET_PAGE_MASK)
2318                               + memory_region_section_addr(section, addr));
2319        switch (endian) {
2320        case DEVICE_LITTLE_ENDIAN:
2321            val = lduw_le_p(ptr);
2322            break;
2323        case DEVICE_BIG_ENDIAN:
2324            val = lduw_be_p(ptr);
2325            break;
2326        default:
2327            val = lduw_p(ptr);
2328            break;
2329        }
2330    }
2331    return val;
2332}
2333
2334uint32_t lduw_phys(hwaddr addr)
2335{
2336    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2337}
2338
2339uint32_t lduw_le_phys(hwaddr addr)
2340{
2341    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2342}
2343
2344uint32_t lduw_be_phys(hwaddr addr)
2345{
2346    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2347}
2348
2349/* warning: addr must be aligned. The ram page is not masked as dirty
2350   and the code inside is not invalidated. It is useful if the dirty
2351   bits are used to track modified PTEs */
2352void stl_phys_notdirty(hwaddr addr, uint32_t val)
2353{
2354    uint8_t *ptr;
2355    MemoryRegionSection *section;
2356
2357    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2358
2359    if (!memory_region_is_ram(section->mr) || section->readonly) {
2360        addr = memory_region_section_addr(section, addr);
2361        if (memory_region_is_ram(section->mr)) {
2362            section = &phys_sections[phys_section_rom];
2363        }
2364        io_mem_write(section->mr, addr, val, 4);
2365    } else {
2366        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2367                               & TARGET_PAGE_MASK)
2368            + memory_region_section_addr(section, addr);
2369        ptr = qemu_get_ram_ptr(addr1);
2370        stl_p(ptr, val);
2371
2372        if (unlikely(in_migration)) {
2373            if (!cpu_physical_memory_is_dirty(addr1)) {
2374                /* invalidate code */
2375                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2376                /* set dirty bit */
2377                cpu_physical_memory_set_dirty_flags(
2378                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2379            }
2380        }
2381    }
2382}
2383
2384void stq_phys_notdirty(hwaddr addr, uint64_t val)
2385{
2386    uint8_t *ptr;
2387    MemoryRegionSection *section;
2388
2389    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2390
2391    if (!memory_region_is_ram(section->mr) || section->readonly) {
2392        addr = memory_region_section_addr(section, addr);
2393        if (memory_region_is_ram(section->mr)) {
2394            section = &phys_sections[phys_section_rom];
2395        }
2396#ifdef TARGET_WORDS_BIGENDIAN
2397        io_mem_write(section->mr, addr, val >> 32, 4);
2398        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2399#else
2400        io_mem_write(section->mr, addr, (uint32_t)val, 4);
2401        io_mem_write(section->mr, addr + 4, val >> 32, 4);
2402#endif
2403    } else {
2404        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2405                                & TARGET_PAGE_MASK)
2406                               + memory_region_section_addr(section, addr));
2407        stq_p(ptr, val);
2408    }
2409}
2410
2411/* warning: addr must be aligned */
2412static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2413                                     enum device_endian endian)
2414{
2415    uint8_t *ptr;
2416    MemoryRegionSection *section;
2417
2418    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2419
2420    if (!memory_region_is_ram(section->mr) || section->readonly) {
2421        addr = memory_region_section_addr(section, addr);
2422        if (memory_region_is_ram(section->mr)) {
2423            section = &phys_sections[phys_section_rom];
2424        }
2425#if defined(TARGET_WORDS_BIGENDIAN)
2426        if (endian == DEVICE_LITTLE_ENDIAN) {
2427            val = bswap32(val);
2428        }
2429#else
2430        if (endian == DEVICE_BIG_ENDIAN) {
2431            val = bswap32(val);
2432        }
2433#endif
2434        io_mem_write(section->mr, addr, val, 4);
2435    } else {
2436        unsigned long addr1;
2437        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2438            + memory_region_section_addr(section, addr);
2439        /* RAM case */
2440        ptr = qemu_get_ram_ptr(addr1);
2441        switch (endian) {
2442        case DEVICE_LITTLE_ENDIAN:
2443            stl_le_p(ptr, val);
2444            break;
2445        case DEVICE_BIG_ENDIAN:
2446            stl_be_p(ptr, val);
2447            break;
2448        default:
2449            stl_p(ptr, val);
2450            break;
2451        }
2452        invalidate_and_set_dirty(addr1, 4);
2453    }
2454}
2455
2456void stl_phys(hwaddr addr, uint32_t val)
2457{
2458    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2459}
2460
2461void stl_le_phys(hwaddr addr, uint32_t val)
2462{
2463    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2464}
2465
2466void stl_be_phys(hwaddr addr, uint32_t val)
2467{
2468    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2469}
2470
2471/* XXX: optimize */
2472void stb_phys(hwaddr addr, uint32_t val)
2473{
2474    uint8_t v = val;
2475    cpu_physical_memory_write(addr, &v, 1);
2476}
2477
2478/* warning: addr must be aligned */
2479static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2480                                     enum device_endian endian)
2481{
2482    uint8_t *ptr;
2483    MemoryRegionSection *section;
2484
2485    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2486
2487    if (!memory_region_is_ram(section->mr) || section->readonly) {
2488        addr = memory_region_section_addr(section, addr);
2489        if (memory_region_is_ram(section->mr)) {
2490            section = &phys_sections[phys_section_rom];
2491        }
2492#if defined(TARGET_WORDS_BIGENDIAN)
2493        if (endian == DEVICE_LITTLE_ENDIAN) {
2494            val = bswap16(val);
2495        }
2496#else
2497        if (endian == DEVICE_BIG_ENDIAN) {
2498            val = bswap16(val);
2499        }
2500#endif
2501        io_mem_write(section->mr, addr, val, 2);
2502    } else {
2503        unsigned long addr1;
2504        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2505            + memory_region_section_addr(section, addr);
2506        /* RAM case */
2507        ptr = qemu_get_ram_ptr(addr1);
2508        switch (endian) {
2509        case DEVICE_LITTLE_ENDIAN:
2510            stw_le_p(ptr, val);
2511            break;
2512        case DEVICE_BIG_ENDIAN:
2513            stw_be_p(ptr, val);
2514            break;
2515        default:
2516            stw_p(ptr, val);
2517            break;
2518        }
2519        invalidate_and_set_dirty(addr1, 2);
2520    }
2521}
2522
2523void stw_phys(hwaddr addr, uint32_t val)
2524{
2525    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2526}
2527
2528void stw_le_phys(hwaddr addr, uint32_t val)
2529{
2530    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2531}
2532
2533void stw_be_phys(hwaddr addr, uint32_t val)
2534{
2535    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2536}
2537
2538/* XXX: optimize */
2539void stq_phys(hwaddr addr, uint64_t val)
2540{
2541    val = tswap64(val);
2542    cpu_physical_memory_write(addr, &val, 8);
2543}
2544
2545void stq_le_phys(hwaddr addr, uint64_t val)
2546{
2547    val = cpu_to_le64(val);
2548    cpu_physical_memory_write(addr, &val, 8);
2549}
2550
2551void stq_be_phys(hwaddr addr, uint64_t val)
2552{
2553    val = cpu_to_be64(val);
2554    cpu_physical_memory_write(addr, &val, 8);
2555}
2556
2557/* virtual memory access for debug (includes writing to ROM) */
2558int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2559                        uint8_t *buf, int len, int is_write)
2560{
2561    int l;
2562    hwaddr phys_addr;
2563    target_ulong page;
2564
2565    while (len > 0) {
2566        page = addr & TARGET_PAGE_MASK;
2567        phys_addr = cpu_get_phys_page_debug(env, page);
2568        /* if no physical page mapped, return an error */
2569        if (phys_addr == -1)
2570            return -1;
2571        l = (page + TARGET_PAGE_SIZE) - addr;
2572        if (l > len)
2573            l = len;
2574        phys_addr += (addr & ~TARGET_PAGE_MASK);
2575        if (is_write)
2576            cpu_physical_memory_write_rom(phys_addr, buf, l);
2577        else
2578            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2579        len -= l;
2580        buf += l;
2581        addr += l;
2582    }
2583    return 0;
2584}
2585#endif
2586
2587#if !defined(CONFIG_USER_ONLY)
2588
2589/*
2590 * A helper function for the _utterly broken_ virtio device model to find out if
2591 * it's running on a big endian machine. Don't do this at home kids!
2592 */
2593bool virtio_is_big_endian(void);
2594bool virtio_is_big_endian(void)
2595{
2596#if defined(TARGET_WORDS_BIGENDIAN)
2597    return true;
2598#else
2599    return false;
2600#endif
2601}
2602
2603#endif
2604
2605#ifndef CONFIG_USER_ONLY
2606bool cpu_physical_memory_is_io(hwaddr phys_addr)
2607{
2608    MemoryRegionSection *section;
2609
2610    section = phys_page_find(address_space_memory.dispatch,
2611                             phys_addr >> TARGET_PAGE_BITS);
2612
2613    return !(memory_region_is_ram(section->mr) ||
2614             memory_region_is_romd(section->mr));
2615}
2616#endif
2617