qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifdef _WIN32
  21#include <windows.h>
  22#else
  23#include <sys/types.h>
  24#include <sys/mman.h>
  25#endif
  26
  27#include "qemu-common.h"
  28#include "cpu.h"
  29#include "tcg.h"
  30#include "hw/hw.h"
  31#include "hw/qdev.h"
  32#include "qemu/osdep.h"
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "hw/xen/xen.h"
  36#include "qemu/timer.h"
  37#include "qemu/config-file.h"
  38#include "exec/memory.h"
  39#include "sysemu/dma.h"
  40#include "exec/address-spaces.h"
  41#if defined(CONFIG_USER_ONLY)
  42#include <qemu.h>
  43#else /* !CONFIG_USER_ONLY */
  44#include "sysemu/xen-mapcache.h"
  45#include "trace.h"
  46#endif
  47#include "exec/cpu-all.h"
  48
  49#include "exec/cputlb.h"
  50#include "translate-all.h"
  51
  52#include "exec/memory-internal.h"
  53
  54//#define DEBUG_SUBPAGE
  55
  56#if !defined(CONFIG_USER_ONLY)
  57static int in_migration;
  58
  59RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
  60
  61static MemoryRegion *system_memory;
  62static MemoryRegion *system_io;
  63
  64AddressSpace address_space_io;
  65AddressSpace address_space_memory;
  66
  67MemoryRegion io_mem_rom, io_mem_notdirty;
  68static MemoryRegion io_mem_unassigned;
  69
  70#endif
  71
  72CPUState *first_cpu;
  73/* current CPU in the current thread. It is only valid inside
  74   cpu_exec() */
  75DEFINE_TLS(CPUState *, current_cpu);
  76/* 0 = Do not count executed instructions.
  77   1 = Precise instruction counting.
  78   2 = Adaptive rate instruction counting.  */
  79int use_icount;
  80
  81#if !defined(CONFIG_USER_ONLY)
  82
  83typedef struct PhysPageEntry PhysPageEntry;
  84
  85struct PhysPageEntry {
  86    uint16_t is_leaf : 1;
  87     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
  88    uint16_t ptr : 15;
  89};
  90
  91typedef PhysPageEntry Node[L2_SIZE];
  92
  93struct AddressSpaceDispatch {
  94    /* This is a multi-level map on the physical address space.
  95     * The bottom level has pointers to MemoryRegionSections.
  96     */
  97    PhysPageEntry phys_map;
  98    Node *nodes;
  99    MemoryRegionSection *sections;
 100    AddressSpace *as;
 101};
 102
 103#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 104typedef struct subpage_t {
 105    MemoryRegion iomem;
 106    AddressSpace *as;
 107    hwaddr base;
 108    uint16_t sub_section[TARGET_PAGE_SIZE];
 109} subpage_t;
 110
 111#define PHYS_SECTION_UNASSIGNED 0
 112#define PHYS_SECTION_NOTDIRTY 1
 113#define PHYS_SECTION_ROM 2
 114#define PHYS_SECTION_WATCH 3
 115
 116typedef struct PhysPageMap {
 117    unsigned sections_nb;
 118    unsigned sections_nb_alloc;
 119    unsigned nodes_nb;
 120    unsigned nodes_nb_alloc;
 121    Node *nodes;
 122    MemoryRegionSection *sections;
 123} PhysPageMap;
 124
 125static PhysPageMap *prev_map;
 126static PhysPageMap next_map;
 127
 128#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 129
 130static void io_mem_init(void);
 131static void memory_map_init(void);
 132static void *qemu_safe_ram_ptr(ram_addr_t addr);
 133
 134static MemoryRegion io_mem_watch;
 135#endif
 136
 137#if !defined(CONFIG_USER_ONLY)
 138
 139static void phys_map_node_reserve(unsigned nodes)
 140{
 141    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
 142        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
 143                                            16);
 144        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
 145                                      next_map.nodes_nb + nodes);
 146        next_map.nodes = g_renew(Node, next_map.nodes,
 147                                 next_map.nodes_nb_alloc);
 148    }
 149}
 150
 151static uint16_t phys_map_node_alloc(void)
 152{
 153    unsigned i;
 154    uint16_t ret;
 155
 156    ret = next_map.nodes_nb++;
 157    assert(ret != PHYS_MAP_NODE_NIL);
 158    assert(ret != next_map.nodes_nb_alloc);
 159    for (i = 0; i < L2_SIZE; ++i) {
 160        next_map.nodes[ret][i].is_leaf = 0;
 161        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
 162    }
 163    return ret;
 164}
 165
 166static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
 167                                hwaddr *nb, uint16_t leaf,
 168                                int level)
 169{
 170    PhysPageEntry *p;
 171    int i;
 172    hwaddr step = (hwaddr)1 << (level * L2_BITS);
 173
 174    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
 175        lp->ptr = phys_map_node_alloc();
 176        p = next_map.nodes[lp->ptr];
 177        if (level == 0) {
 178            for (i = 0; i < L2_SIZE; i++) {
 179                p[i].is_leaf = 1;
 180                p[i].ptr = PHYS_SECTION_UNASSIGNED;
 181            }
 182        }
 183    } else {
 184        p = next_map.nodes[lp->ptr];
 185    }
 186    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 187
 188    while (*nb && lp < &p[L2_SIZE]) {
 189        if ((*index & (step - 1)) == 0 && *nb >= step) {
 190            lp->is_leaf = true;
 191            lp->ptr = leaf;
 192            *index += step;
 193            *nb -= step;
 194        } else {
 195            phys_page_set_level(lp, index, nb, leaf, level - 1);
 196        }
 197        ++lp;
 198    }
 199}
 200
 201static void phys_page_set(AddressSpaceDispatch *d,
 202                          hwaddr index, hwaddr nb,
 203                          uint16_t leaf)
 204{
 205    /* Wildly overreserve - it doesn't matter much. */
 206    phys_map_node_reserve(3 * P_L2_LEVELS);
 207
 208    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 209}
 210
 211static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
 212                                           Node *nodes, MemoryRegionSection *sections)
 213{
 214    PhysPageEntry *p;
 215    int i;
 216
 217    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
 218        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 219            return &sections[PHYS_SECTION_UNASSIGNED];
 220        }
 221        p = nodes[lp.ptr];
 222        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
 223    }
 224    return &sections[lp.ptr];
 225}
 226
 227bool memory_region_is_unassigned(MemoryRegion *mr)
 228{
 229    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 230        && mr != &io_mem_watch;
 231}
 232
 233static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 234                                                        hwaddr addr,
 235                                                        bool resolve_subpage)
 236{
 237    MemoryRegionSection *section;
 238    subpage_t *subpage;
 239
 240    section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
 241                             d->nodes, d->sections);
 242    if (resolve_subpage && section->mr->subpage) {
 243        subpage = container_of(section->mr, subpage_t, iomem);
 244        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 245    }
 246    return section;
 247}
 248
 249static MemoryRegionSection *
 250address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 251                                 hwaddr *plen, bool resolve_subpage)
 252{
 253    MemoryRegionSection *section;
 254    Int128 diff;
 255
 256    section = address_space_lookup_region(d, addr, resolve_subpage);
 257    /* Compute offset within MemoryRegionSection */
 258    addr -= section->offset_within_address_space;
 259
 260    /* Compute offset within MemoryRegion */
 261    *xlat = addr + section->offset_within_region;
 262
 263    diff = int128_sub(section->mr->size, int128_make64(addr));
 264    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 265    return section;
 266}
 267
 268MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 269                                      hwaddr *xlat, hwaddr *plen,
 270                                      bool is_write)
 271{
 272    IOMMUTLBEntry iotlb;
 273    MemoryRegionSection *section;
 274    MemoryRegion *mr;
 275    hwaddr len = *plen;
 276
 277    for (;;) {
 278        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
 279        mr = section->mr;
 280
 281        if (!mr->iommu_ops) {
 282            break;
 283        }
 284
 285        iotlb = mr->iommu_ops->translate(mr, addr);
 286        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 287                | (addr & iotlb.addr_mask));
 288        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 289        if (!(iotlb.perm & (1 << is_write))) {
 290            mr = &io_mem_unassigned;
 291            break;
 292        }
 293
 294        as = iotlb.target_as;
 295    }
 296
 297    *plen = len;
 298    *xlat = addr;
 299    return mr;
 300}
 301
 302MemoryRegionSection *
 303address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
 304                                  hwaddr *plen)
 305{
 306    MemoryRegionSection *section;
 307    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
 308
 309    assert(!section->mr->iommu_ops);
 310    return section;
 311}
 312#endif
 313
 314void cpu_exec_init_all(void)
 315{
 316#if !defined(CONFIG_USER_ONLY)
 317    qemu_mutex_init(&ram_list.mutex);
 318    memory_map_init();
 319    io_mem_init();
 320#endif
 321}
 322
 323#if !defined(CONFIG_USER_ONLY)
 324
 325static int cpu_common_post_load(void *opaque, int version_id)
 326{
 327    CPUState *cpu = opaque;
 328
 329    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 330       version_id is increased. */
 331    cpu->interrupt_request &= ~0x01;
 332    tlb_flush(cpu->env_ptr, 1);
 333
 334    return 0;
 335}
 336
 337const VMStateDescription vmstate_cpu_common = {
 338    .name = "cpu_common",
 339    .version_id = 1,
 340    .minimum_version_id = 1,
 341    .minimum_version_id_old = 1,
 342    .post_load = cpu_common_post_load,
 343    .fields      = (VMStateField []) {
 344        VMSTATE_UINT32(halted, CPUState),
 345        VMSTATE_UINT32(interrupt_request, CPUState),
 346        VMSTATE_END_OF_LIST()
 347    }
 348};
 349
 350#endif
 351
 352CPUState *qemu_get_cpu(int index)
 353{
 354    CPUState *cpu = first_cpu;
 355
 356    while (cpu) {
 357        if (cpu->cpu_index == index) {
 358            break;
 359        }
 360        cpu = cpu->next_cpu;
 361    }
 362
 363    return cpu;
 364}
 365
 366void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
 367{
 368    CPUState *cpu;
 369
 370    cpu = first_cpu;
 371    while (cpu) {
 372        func(cpu, data);
 373        cpu = cpu->next_cpu;
 374    }
 375}
 376
 377void cpu_exec_init(CPUArchState *env)
 378{
 379    CPUState *cpu = ENV_GET_CPU(env);
 380    CPUClass *cc = CPU_GET_CLASS(cpu);
 381    CPUState **pcpu;
 382    int cpu_index;
 383
 384#if defined(CONFIG_USER_ONLY)
 385    cpu_list_lock();
 386#endif
 387    cpu->next_cpu = NULL;
 388    pcpu = &first_cpu;
 389    cpu_index = 0;
 390    while (*pcpu != NULL) {
 391        pcpu = &(*pcpu)->next_cpu;
 392        cpu_index++;
 393    }
 394    cpu->cpu_index = cpu_index;
 395    cpu->numa_node = 0;
 396    QTAILQ_INIT(&env->breakpoints);
 397    QTAILQ_INIT(&env->watchpoints);
 398#ifndef CONFIG_USER_ONLY
 399    cpu->thread_id = qemu_get_thread_id();
 400#endif
 401    *pcpu = cpu;
 402#if defined(CONFIG_USER_ONLY)
 403    cpu_list_unlock();
 404#endif
 405    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 406        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 407    }
 408#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 409    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 410                    cpu_save, cpu_load, env);
 411    assert(cc->vmsd == NULL);
 412    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 413#endif
 414    if (cc->vmsd != NULL) {
 415        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 416    }
 417}
 418
 419#if defined(TARGET_HAS_ICE)
 420#if defined(CONFIG_USER_ONLY)
 421static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 422{
 423    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 424}
 425#else
 426static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 427{
 428    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 429    if (phys != -1) {
 430        tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
 431    }
 432}
 433#endif
 434#endif /* TARGET_HAS_ICE */
 435
 436#if defined(CONFIG_USER_ONLY)
 437void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
 438
 439{
 440}
 441
 442int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
 443                          int flags, CPUWatchpoint **watchpoint)
 444{
 445    return -ENOSYS;
 446}
 447#else
 448/* Add a watchpoint.  */
 449int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
 450                          int flags, CPUWatchpoint **watchpoint)
 451{
 452    target_ulong len_mask = ~(len - 1);
 453    CPUWatchpoint *wp;
 454
 455    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
 456    if ((len & (len - 1)) || (addr & ~len_mask) ||
 457            len == 0 || len > TARGET_PAGE_SIZE) {
 458        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
 459                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
 460        return -EINVAL;
 461    }
 462    wp = g_malloc(sizeof(*wp));
 463
 464    wp->vaddr = addr;
 465    wp->len_mask = len_mask;
 466    wp->flags = flags;
 467
 468    /* keep all GDB-injected watchpoints in front */
 469    if (flags & BP_GDB)
 470        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
 471    else
 472        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
 473
 474    tlb_flush_page(env, addr);
 475
 476    if (watchpoint)
 477        *watchpoint = wp;
 478    return 0;
 479}
 480
 481/* Remove a specific watchpoint.  */
 482int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
 483                          int flags)
 484{
 485    target_ulong len_mask = ~(len - 1);
 486    CPUWatchpoint *wp;
 487
 488    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 489        if (addr == wp->vaddr && len_mask == wp->len_mask
 490                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 491            cpu_watchpoint_remove_by_ref(env, wp);
 492            return 0;
 493        }
 494    }
 495    return -ENOENT;
 496}
 497
 498/* Remove a specific watchpoint by reference.  */
 499void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
 500{
 501    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
 502
 503    tlb_flush_page(env, watchpoint->vaddr);
 504
 505    g_free(watchpoint);
 506}
 507
 508/* Remove all matching watchpoints.  */
 509void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
 510{
 511    CPUWatchpoint *wp, *next;
 512
 513    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
 514        if (wp->flags & mask)
 515            cpu_watchpoint_remove_by_ref(env, wp);
 516    }
 517}
 518#endif
 519
 520/* Add a breakpoint.  */
 521int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
 522                          CPUBreakpoint **breakpoint)
 523{
 524#if defined(TARGET_HAS_ICE)
 525    CPUBreakpoint *bp;
 526
 527    bp = g_malloc(sizeof(*bp));
 528
 529    bp->pc = pc;
 530    bp->flags = flags;
 531
 532    /* keep all GDB-injected breakpoints in front */
 533    if (flags & BP_GDB) {
 534        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
 535    } else {
 536        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
 537    }
 538
 539    breakpoint_invalidate(ENV_GET_CPU(env), pc);
 540
 541    if (breakpoint) {
 542        *breakpoint = bp;
 543    }
 544    return 0;
 545#else
 546    return -ENOSYS;
 547#endif
 548}
 549
 550/* Remove a specific breakpoint.  */
 551int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
 552{
 553#if defined(TARGET_HAS_ICE)
 554    CPUBreakpoint *bp;
 555
 556    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
 557        if (bp->pc == pc && bp->flags == flags) {
 558            cpu_breakpoint_remove_by_ref(env, bp);
 559            return 0;
 560        }
 561    }
 562    return -ENOENT;
 563#else
 564    return -ENOSYS;
 565#endif
 566}
 567
 568/* Remove a specific breakpoint by reference.  */
 569void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
 570{
 571#if defined(TARGET_HAS_ICE)
 572    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
 573
 574    breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
 575
 576    g_free(breakpoint);
 577#endif
 578}
 579
 580/* Remove all matching breakpoints. */
 581void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
 582{
 583#if defined(TARGET_HAS_ICE)
 584    CPUBreakpoint *bp, *next;
 585
 586    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
 587        if (bp->flags & mask)
 588            cpu_breakpoint_remove_by_ref(env, bp);
 589    }
 590#endif
 591}
 592
 593/* enable or disable single step mode. EXCP_DEBUG is returned by the
 594   CPU loop after each instruction */
 595void cpu_single_step(CPUState *cpu, int enabled)
 596{
 597#if defined(TARGET_HAS_ICE)
 598    if (cpu->singlestep_enabled != enabled) {
 599        cpu->singlestep_enabled = enabled;
 600        if (kvm_enabled()) {
 601            kvm_update_guest_debug(cpu, 0);
 602        } else {
 603            /* must flush all the translated code to avoid inconsistencies */
 604            /* XXX: only flush what is necessary */
 605            CPUArchState *env = cpu->env_ptr;
 606            tb_flush(env);
 607        }
 608    }
 609#endif
 610}
 611
 612void cpu_abort(CPUArchState *env, const char *fmt, ...)
 613{
 614    CPUState *cpu = ENV_GET_CPU(env);
 615    va_list ap;
 616    va_list ap2;
 617
 618    va_start(ap, fmt);
 619    va_copy(ap2, ap);
 620    fprintf(stderr, "qemu: fatal: ");
 621    vfprintf(stderr, fmt, ap);
 622    fprintf(stderr, "\n");
 623    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 624    if (qemu_log_enabled()) {
 625        qemu_log("qemu: fatal: ");
 626        qemu_log_vprintf(fmt, ap2);
 627        qemu_log("\n");
 628        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 629        qemu_log_flush();
 630        qemu_log_close();
 631    }
 632    va_end(ap2);
 633    va_end(ap);
 634#if defined(CONFIG_USER_ONLY)
 635    {
 636        struct sigaction act;
 637        sigfillset(&act.sa_mask);
 638        act.sa_handler = SIG_DFL;
 639        sigaction(SIGABRT, &act, NULL);
 640    }
 641#endif
 642    abort();
 643}
 644
 645CPUArchState *cpu_copy(CPUArchState *env)
 646{
 647    CPUArchState *new_env = cpu_init(env->cpu_model_str);
 648#if defined(TARGET_HAS_ICE)
 649    CPUBreakpoint *bp;
 650    CPUWatchpoint *wp;
 651#endif
 652
 653    /* Reset non arch specific state */
 654    cpu_reset(ENV_GET_CPU(new_env));
 655
 656    /* Copy arch specific state into the new CPU */
 657    memcpy(new_env, env, sizeof(CPUArchState));
 658
 659    /* Clone all break/watchpoints.
 660       Note: Once we support ptrace with hw-debug register access, make sure
 661       BP_CPU break/watchpoints are handled correctly on clone. */
 662    QTAILQ_INIT(&env->breakpoints);
 663    QTAILQ_INIT(&env->watchpoints);
 664#if defined(TARGET_HAS_ICE)
 665    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
 666        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
 667    }
 668    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 669        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
 670                              wp->flags, NULL);
 671    }
 672#endif
 673
 674    return new_env;
 675}
 676
 677#if !defined(CONFIG_USER_ONLY)
 678static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
 679                                      uintptr_t length)
 680{
 681    uintptr_t start1;
 682
 683    /* we modify the TLB cache so that the dirty bit will be set again
 684       when accessing the range */
 685    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
 686    /* Check that we don't span multiple blocks - this breaks the
 687       address comparisons below.  */
 688    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
 689            != (end - 1) - start) {
 690        abort();
 691    }
 692    cpu_tlb_reset_dirty_all(start1, length);
 693
 694}
 695
 696/* Note: start and end must be within the same ram block.  */
 697void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
 698                                     int dirty_flags)
 699{
 700    uintptr_t length;
 701
 702    start &= TARGET_PAGE_MASK;
 703    end = TARGET_PAGE_ALIGN(end);
 704
 705    length = end - start;
 706    if (length == 0)
 707        return;
 708    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
 709
 710    if (tcg_enabled()) {
 711        tlb_reset_dirty_range_all(start, end, length);
 712    }
 713}
 714
 715static int cpu_physical_memory_set_dirty_tracking(int enable)
 716{
 717    int ret = 0;
 718    in_migration = enable;
 719    return ret;
 720}
 721
 722hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 723                                       MemoryRegionSection *section,
 724                                       target_ulong vaddr,
 725                                       hwaddr paddr, hwaddr xlat,
 726                                       int prot,
 727                                       target_ulong *address)
 728{
 729    hwaddr iotlb;
 730    CPUWatchpoint *wp;
 731
 732    if (memory_region_is_ram(section->mr)) {
 733        /* Normal RAM.  */
 734        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 735            + xlat;
 736        if (!section->readonly) {
 737            iotlb |= PHYS_SECTION_NOTDIRTY;
 738        } else {
 739            iotlb |= PHYS_SECTION_ROM;
 740        }
 741    } else {
 742        iotlb = section - address_space_memory.dispatch->sections;
 743        iotlb += xlat;
 744    }
 745
 746    /* Make accesses to pages with watchpoints go via the
 747       watchpoint trap routines.  */
 748    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
 749        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
 750            /* Avoid trapping reads of pages with a write breakpoint. */
 751            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 752                iotlb = PHYS_SECTION_WATCH + paddr;
 753                *address |= TLB_MMIO;
 754                break;
 755            }
 756        }
 757    }
 758
 759    return iotlb;
 760}
 761#endif /* defined(CONFIG_USER_ONLY) */
 762
 763#if !defined(CONFIG_USER_ONLY)
 764
 765static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 766                             uint16_t section);
 767static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 768
 769static uint16_t phys_section_add(MemoryRegionSection *section)
 770{
 771    /* The physical section number is ORed with a page-aligned
 772     * pointer to produce the iotlb entries.  Thus it should
 773     * never overflow into the page-aligned value.
 774     */
 775    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
 776
 777    if (next_map.sections_nb == next_map.sections_nb_alloc) {
 778        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
 779                                         16);
 780        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
 781                                    next_map.sections_nb_alloc);
 782    }
 783    next_map.sections[next_map.sections_nb] = *section;
 784    memory_region_ref(section->mr);
 785    return next_map.sections_nb++;
 786}
 787
 788static void phys_section_destroy(MemoryRegion *mr)
 789{
 790    memory_region_unref(mr);
 791
 792    if (mr->subpage) {
 793        subpage_t *subpage = container_of(mr, subpage_t, iomem);
 794        memory_region_destroy(&subpage->iomem);
 795        g_free(subpage);
 796    }
 797}
 798
 799static void phys_sections_free(PhysPageMap *map)
 800{
 801    while (map->sections_nb > 0) {
 802        MemoryRegionSection *section = &map->sections[--map->sections_nb];
 803        phys_section_destroy(section->mr);
 804    }
 805    g_free(map->sections);
 806    g_free(map->nodes);
 807    g_free(map);
 808}
 809
 810static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 811{
 812    subpage_t *subpage;
 813    hwaddr base = section->offset_within_address_space
 814        & TARGET_PAGE_MASK;
 815    MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
 816                                                   next_map.nodes, next_map.sections);
 817    MemoryRegionSection subsection = {
 818        .offset_within_address_space = base,
 819        .size = int128_make64(TARGET_PAGE_SIZE),
 820    };
 821    hwaddr start, end;
 822
 823    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
 824
 825    if (!(existing->mr->subpage)) {
 826        subpage = subpage_init(d->as, base);
 827        subsection.mr = &subpage->iomem;
 828        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
 829                      phys_section_add(&subsection));
 830    } else {
 831        subpage = container_of(existing->mr, subpage_t, iomem);
 832    }
 833    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
 834    end = start + int128_get64(section->size) - 1;
 835    subpage_register(subpage, start, end, phys_section_add(section));
 836}
 837
 838
 839static void register_multipage(AddressSpaceDispatch *d,
 840                               MemoryRegionSection *section)
 841{
 842    hwaddr start_addr = section->offset_within_address_space;
 843    uint16_t section_index = phys_section_add(section);
 844    uint64_t num_pages = int128_get64(int128_rshift(section->size,
 845                                                    TARGET_PAGE_BITS));
 846
 847    assert(num_pages);
 848    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
 849}
 850
 851static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 852{
 853    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
 854    AddressSpaceDispatch *d = as->next_dispatch;
 855    MemoryRegionSection now = *section, remain = *section;
 856    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
 857
 858    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
 859        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
 860                       - now.offset_within_address_space;
 861
 862        now.size = int128_min(int128_make64(left), now.size);
 863        register_subpage(d, &now);
 864    } else {
 865        now.size = int128_zero();
 866    }
 867    while (int128_ne(remain.size, now.size)) {
 868        remain.size = int128_sub(remain.size, now.size);
 869        remain.offset_within_address_space += int128_get64(now.size);
 870        remain.offset_within_region += int128_get64(now.size);
 871        now = remain;
 872        if (int128_lt(remain.size, page_size)) {
 873            register_subpage(d, &now);
 874        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
 875            now.size = page_size;
 876            register_subpage(d, &now);
 877        } else {
 878            now.size = int128_and(now.size, int128_neg(page_size));
 879            register_multipage(d, &now);
 880        }
 881    }
 882}
 883
 884void qemu_flush_coalesced_mmio_buffer(void)
 885{
 886    if (kvm_enabled())
 887        kvm_flush_coalesced_mmio_buffer();
 888}
 889
 890void qemu_mutex_lock_ramlist(void)
 891{
 892    qemu_mutex_lock(&ram_list.mutex);
 893}
 894
 895void qemu_mutex_unlock_ramlist(void)
 896{
 897    qemu_mutex_unlock(&ram_list.mutex);
 898}
 899
 900#if defined(__linux__) && !defined(TARGET_S390X)
 901
 902#include <sys/vfs.h>
 903
 904#define HUGETLBFS_MAGIC       0x958458f6
 905
 906static long gethugepagesize(const char *path)
 907{
 908    struct statfs fs;
 909    int ret;
 910
 911    do {
 912        ret = statfs(path, &fs);
 913    } while (ret != 0 && errno == EINTR);
 914
 915    if (ret != 0) {
 916        perror(path);
 917        return 0;
 918    }
 919
 920    if (fs.f_type != HUGETLBFS_MAGIC)
 921        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
 922
 923    return fs.f_bsize;
 924}
 925
 926static void *file_ram_alloc(RAMBlock *block,
 927                            ram_addr_t memory,
 928                            const char *path)
 929{
 930    char *filename;
 931    char *sanitized_name;
 932    char *c;
 933    void *area;
 934    int fd;
 935#ifdef MAP_POPULATE
 936    int flags;
 937#endif
 938    unsigned long hpagesize;
 939
 940    hpagesize = gethugepagesize(path);
 941    if (!hpagesize) {
 942        return NULL;
 943    }
 944
 945    if (memory < hpagesize) {
 946        return NULL;
 947    }
 948
 949    if (kvm_enabled() && !kvm_has_sync_mmu()) {
 950        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
 951        return NULL;
 952    }
 953
 954    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
 955    sanitized_name = g_strdup(block->mr->name);
 956    for (c = sanitized_name; *c != '\0'; c++) {
 957        if (*c == '/')
 958            *c = '_';
 959    }
 960
 961    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
 962                               sanitized_name);
 963    g_free(sanitized_name);
 964
 965    fd = mkstemp(filename);
 966    if (fd < 0) {
 967        perror("unable to create backing store for hugepages");
 968        g_free(filename);
 969        return NULL;
 970    }
 971    unlink(filename);
 972    g_free(filename);
 973
 974    memory = (memory+hpagesize-1) & ~(hpagesize-1);
 975
 976    /*
 977     * ftruncate is not supported by hugetlbfs in older
 978     * hosts, so don't bother bailing out on errors.
 979     * If anything goes wrong with it under other filesystems,
 980     * mmap will fail.
 981     */
 982    if (ftruncate(fd, memory))
 983        perror("ftruncate");
 984
 985#ifdef MAP_POPULATE
 986    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
 987     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
 988     * to sidestep this quirk.
 989     */
 990    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
 991    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
 992#else
 993    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 994#endif
 995    if (area == MAP_FAILED) {
 996        perror("file_ram_alloc: can't mmap RAM pages");
 997        close(fd);
 998        return (NULL);
 999    }
1000    block->fd = fd;
1001    return area;
1002}
1003#endif
1004
1005static ram_addr_t find_ram_offset(ram_addr_t size)
1006{
1007    RAMBlock *block, *next_block;
1008    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1009
1010    assert(size != 0); /* it would hand out same offset multiple times */
1011
1012    if (QTAILQ_EMPTY(&ram_list.blocks))
1013        return 0;
1014
1015    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1016        ram_addr_t end, next = RAM_ADDR_MAX;
1017
1018        end = block->offset + block->length;
1019
1020        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1021            if (next_block->offset >= end) {
1022                next = MIN(next, next_block->offset);
1023            }
1024        }
1025        if (next - end >= size && next - end < mingap) {
1026            offset = end;
1027            mingap = next - end;
1028        }
1029    }
1030
1031    if (offset == RAM_ADDR_MAX) {
1032        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1033                (uint64_t)size);
1034        abort();
1035    }
1036
1037    return offset;
1038}
1039
1040ram_addr_t last_ram_offset(void)
1041{
1042    RAMBlock *block;
1043    ram_addr_t last = 0;
1044
1045    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1046        last = MAX(last, block->offset + block->length);
1047
1048    return last;
1049}
1050
1051static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1052{
1053    int ret;
1054
1055    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1056    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1057                           "dump-guest-core", true)) {
1058        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1059        if (ret) {
1060            perror("qemu_madvise");
1061            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1062                            "but dump_guest_core=off specified\n");
1063        }
1064    }
1065}
1066
1067void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1068{
1069    RAMBlock *new_block, *block;
1070
1071    new_block = NULL;
1072    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1073        if (block->offset == addr) {
1074            new_block = block;
1075            break;
1076        }
1077    }
1078    assert(new_block);
1079    assert(!new_block->idstr[0]);
1080
1081    if (dev) {
1082        char *id = qdev_get_dev_path(dev);
1083        if (id) {
1084            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1085            g_free(id);
1086        }
1087    }
1088    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1089
1090    /* This assumes the iothread lock is taken here too.  */
1091    qemu_mutex_lock_ramlist();
1092    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1093        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1094            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1095                    new_block->idstr);
1096            abort();
1097        }
1098    }
1099    qemu_mutex_unlock_ramlist();
1100}
1101
1102static int memory_try_enable_merging(void *addr, size_t len)
1103{
1104    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1105        /* disabled by the user */
1106        return 0;
1107    }
1108
1109    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1110}
1111
1112ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1113                                   MemoryRegion *mr)
1114{
1115    RAMBlock *block, *new_block;
1116
1117    size = TARGET_PAGE_ALIGN(size);
1118    new_block = g_malloc0(sizeof(*new_block));
1119
1120    /* This assumes the iothread lock is taken here too.  */
1121    qemu_mutex_lock_ramlist();
1122    new_block->mr = mr;
1123    new_block->offset = find_ram_offset(size);
1124    if (host) {
1125        new_block->host = host;
1126        new_block->flags |= RAM_PREALLOC_MASK;
1127    } else {
1128        if (mem_path) {
1129#if defined (__linux__) && !defined(TARGET_S390X)
1130            new_block->host = file_ram_alloc(new_block, size, mem_path);
1131            if (!new_block->host) {
1132                new_block->host = qemu_anon_ram_alloc(size);
1133                memory_try_enable_merging(new_block->host, size);
1134            }
1135#else
1136            fprintf(stderr, "-mem-path option unsupported\n");
1137            exit(1);
1138#endif
1139        } else {
1140            if (xen_enabled()) {
1141                xen_ram_alloc(new_block->offset, size, mr);
1142            } else if (kvm_enabled()) {
1143                /* some s390/kvm configurations have special constraints */
1144                new_block->host = kvm_ram_alloc(size);
1145            } else {
1146                new_block->host = qemu_anon_ram_alloc(size);
1147            }
1148            memory_try_enable_merging(new_block->host, size);
1149        }
1150    }
1151    new_block->length = size;
1152
1153    /* Keep the list sorted from biggest to smallest block.  */
1154    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1155        if (block->length < new_block->length) {
1156            break;
1157        }
1158    }
1159    if (block) {
1160        QTAILQ_INSERT_BEFORE(block, new_block, next);
1161    } else {
1162        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1163    }
1164    ram_list.mru_block = NULL;
1165
1166    ram_list.version++;
1167    qemu_mutex_unlock_ramlist();
1168
1169    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1170                                       last_ram_offset() >> TARGET_PAGE_BITS);
1171    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1172           0, size >> TARGET_PAGE_BITS);
1173    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1174
1175    qemu_ram_setup_dump(new_block->host, size);
1176    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1177    qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1178
1179    if (kvm_enabled())
1180        kvm_setup_guest_memory(new_block->host, size);
1181
1182    return new_block->offset;
1183}
1184
1185ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1186{
1187    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1188}
1189
1190void qemu_ram_free_from_ptr(ram_addr_t addr)
1191{
1192    RAMBlock *block;
1193
1194    /* This assumes the iothread lock is taken here too.  */
1195    qemu_mutex_lock_ramlist();
1196    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1197        if (addr == block->offset) {
1198            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1199            ram_list.mru_block = NULL;
1200            ram_list.version++;
1201            g_free(block);
1202            break;
1203        }
1204    }
1205    qemu_mutex_unlock_ramlist();
1206}
1207
1208void qemu_ram_free(ram_addr_t addr)
1209{
1210    RAMBlock *block;
1211
1212    /* This assumes the iothread lock is taken here too.  */
1213    qemu_mutex_lock_ramlist();
1214    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1215        if (addr == block->offset) {
1216            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1217            ram_list.mru_block = NULL;
1218            ram_list.version++;
1219            if (block->flags & RAM_PREALLOC_MASK) {
1220                ;
1221            } else if (mem_path) {
1222#if defined (__linux__) && !defined(TARGET_S390X)
1223                if (block->fd) {
1224                    munmap(block->host, block->length);
1225                    close(block->fd);
1226                } else {
1227                    qemu_anon_ram_free(block->host, block->length);
1228                }
1229#else
1230                abort();
1231#endif
1232            } else {
1233                if (xen_enabled()) {
1234                    xen_invalidate_map_cache_entry(block->host);
1235                } else {
1236                    qemu_anon_ram_free(block->host, block->length);
1237                }
1238            }
1239            g_free(block);
1240            break;
1241        }
1242    }
1243    qemu_mutex_unlock_ramlist();
1244
1245}
1246
1247#ifndef _WIN32
1248void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1249{
1250    RAMBlock *block;
1251    ram_addr_t offset;
1252    int flags;
1253    void *area, *vaddr;
1254
1255    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1256        offset = addr - block->offset;
1257        if (offset < block->length) {
1258            vaddr = block->host + offset;
1259            if (block->flags & RAM_PREALLOC_MASK) {
1260                ;
1261            } else {
1262                flags = MAP_FIXED;
1263                munmap(vaddr, length);
1264                if (mem_path) {
1265#if defined(__linux__) && !defined(TARGET_S390X)
1266                    if (block->fd) {
1267#ifdef MAP_POPULATE
1268                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1269                            MAP_PRIVATE;
1270#else
1271                        flags |= MAP_PRIVATE;
1272#endif
1273                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1274                                    flags, block->fd, offset);
1275                    } else {
1276                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1277                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1278                                    flags, -1, 0);
1279                    }
1280#else
1281                    abort();
1282#endif
1283                } else {
1284#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1285                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1286                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1287                                flags, -1, 0);
1288#else
1289                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1290                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1291                                flags, -1, 0);
1292#endif
1293                }
1294                if (area != vaddr) {
1295                    fprintf(stderr, "Could not remap addr: "
1296                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1297                            length, addr);
1298                    exit(1);
1299                }
1300                memory_try_enable_merging(vaddr, length);
1301                qemu_ram_setup_dump(vaddr, length);
1302            }
1303            return;
1304        }
1305    }
1306}
1307#endif /* !_WIN32 */
1308
1309static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1310{
1311    RAMBlock *block;
1312
1313    /* The list is protected by the iothread lock here.  */
1314    block = ram_list.mru_block;
1315    if (block && addr - block->offset < block->length) {
1316        goto found;
1317    }
1318    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319        if (addr - block->offset < block->length) {
1320            goto found;
1321        }
1322    }
1323
1324    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1325    abort();
1326
1327found:
1328    ram_list.mru_block = block;
1329    return block;
1330}
1331
1332/* Return a host pointer to ram allocated with qemu_ram_alloc.
1333   With the exception of the softmmu code in this file, this should
1334   only be used for local memory (e.g. video ram) that the device owns,
1335   and knows it isn't going to access beyond the end of the block.
1336
1337   It should not be used for general purpose DMA.
1338   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1339 */
1340void *qemu_get_ram_ptr(ram_addr_t addr)
1341{
1342    RAMBlock *block = qemu_get_ram_block(addr);
1343
1344    if (xen_enabled()) {
1345        /* We need to check if the requested address is in the RAM
1346         * because we don't want to map the entire memory in QEMU.
1347         * In that case just map until the end of the page.
1348         */
1349        if (block->offset == 0) {
1350            return xen_map_cache(addr, 0, 0);
1351        } else if (block->host == NULL) {
1352            block->host =
1353                xen_map_cache(block->offset, block->length, 1);
1354        }
1355    }
1356    return block->host + (addr - block->offset);
1357}
1358
1359/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1360 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1361 *
1362 * ??? Is this still necessary?
1363 */
1364static void *qemu_safe_ram_ptr(ram_addr_t addr)
1365{
1366    RAMBlock *block;
1367
1368    /* The list is protected by the iothread lock here.  */
1369    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1370        if (addr - block->offset < block->length) {
1371            if (xen_enabled()) {
1372                /* We need to check if the requested address is in the RAM
1373                 * because we don't want to map the entire memory in QEMU.
1374                 * In that case just map until the end of the page.
1375                 */
1376                if (block->offset == 0) {
1377                    return xen_map_cache(addr, 0, 0);
1378                } else if (block->host == NULL) {
1379                    block->host =
1380                        xen_map_cache(block->offset, block->length, 1);
1381                }
1382            }
1383            return block->host + (addr - block->offset);
1384        }
1385    }
1386
1387    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1388    abort();
1389
1390    return NULL;
1391}
1392
1393/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1394 * but takes a size argument */
1395static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1396{
1397    if (*size == 0) {
1398        return NULL;
1399    }
1400    if (xen_enabled()) {
1401        return xen_map_cache(addr, *size, 1);
1402    } else {
1403        RAMBlock *block;
1404
1405        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1406            if (addr - block->offset < block->length) {
1407                if (addr - block->offset + *size > block->length)
1408                    *size = block->length - addr + block->offset;
1409                return block->host + (addr - block->offset);
1410            }
1411        }
1412
1413        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1414        abort();
1415    }
1416}
1417
1418/* Some of the softmmu routines need to translate from a host pointer
1419   (typically a TLB entry) back to a ram offset.  */
1420MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1421{
1422    RAMBlock *block;
1423    uint8_t *host = ptr;
1424
1425    if (xen_enabled()) {
1426        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1427        return qemu_get_ram_block(*ram_addr)->mr;
1428    }
1429
1430    block = ram_list.mru_block;
1431    if (block && block->host && host - block->host < block->length) {
1432        goto found;
1433    }
1434
1435    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1436        /* This case append when the block is not mapped. */
1437        if (block->host == NULL) {
1438            continue;
1439        }
1440        if (host - block->host < block->length) {
1441            goto found;
1442        }
1443    }
1444
1445    return NULL;
1446
1447found:
1448    *ram_addr = block->offset + (host - block->host);
1449    return block->mr;
1450}
1451
1452static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1453                               uint64_t val, unsigned size)
1454{
1455    int dirty_flags;
1456    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1457    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1458        tb_invalidate_phys_page_fast(ram_addr, size);
1459        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1460    }
1461    switch (size) {
1462    case 1:
1463        stb_p(qemu_get_ram_ptr(ram_addr), val);
1464        break;
1465    case 2:
1466        stw_p(qemu_get_ram_ptr(ram_addr), val);
1467        break;
1468    case 4:
1469        stl_p(qemu_get_ram_ptr(ram_addr), val);
1470        break;
1471    default:
1472        abort();
1473    }
1474    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1475    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1476    /* we remove the notdirty callback only if the code has been
1477       flushed */
1478    if (dirty_flags == 0xff) {
1479        CPUArchState *env = current_cpu->env_ptr;
1480        tlb_set_dirty(env, env->mem_io_vaddr);
1481    }
1482}
1483
1484static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1485                                 unsigned size, bool is_write)
1486{
1487    return is_write;
1488}
1489
1490static const MemoryRegionOps notdirty_mem_ops = {
1491    .write = notdirty_mem_write,
1492    .valid.accepts = notdirty_mem_accepts,
1493    .endianness = DEVICE_NATIVE_ENDIAN,
1494};
1495
1496/* Generate a debug exception if a watchpoint has been hit.  */
1497static void check_watchpoint(int offset, int len_mask, int flags)
1498{
1499    CPUArchState *env = current_cpu->env_ptr;
1500    target_ulong pc, cs_base;
1501    target_ulong vaddr;
1502    CPUWatchpoint *wp;
1503    int cpu_flags;
1504
1505    if (env->watchpoint_hit) {
1506        /* We re-entered the check after replacing the TB. Now raise
1507         * the debug interrupt so that is will trigger after the
1508         * current instruction. */
1509        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1510        return;
1511    }
1512    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1513    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1514        if ((vaddr == (wp->vaddr & len_mask) ||
1515             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1516            wp->flags |= BP_WATCHPOINT_HIT;
1517            if (!env->watchpoint_hit) {
1518                env->watchpoint_hit = wp;
1519                tb_check_watchpoint(env);
1520                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1521                    env->exception_index = EXCP_DEBUG;
1522                    cpu_loop_exit(env);
1523                } else {
1524                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1525                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1526                    cpu_resume_from_signal(env, NULL);
1527                }
1528            }
1529        } else {
1530            wp->flags &= ~BP_WATCHPOINT_HIT;
1531        }
1532    }
1533}
1534
1535/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1536   so these check for a hit then pass through to the normal out-of-line
1537   phys routines.  */
1538static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1539                               unsigned size)
1540{
1541    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1542    switch (size) {
1543    case 1: return ldub_phys(addr);
1544    case 2: return lduw_phys(addr);
1545    case 4: return ldl_phys(addr);
1546    default: abort();
1547    }
1548}
1549
1550static void watch_mem_write(void *opaque, hwaddr addr,
1551                            uint64_t val, unsigned size)
1552{
1553    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1554    switch (size) {
1555    case 1:
1556        stb_phys(addr, val);
1557        break;
1558    case 2:
1559        stw_phys(addr, val);
1560        break;
1561    case 4:
1562        stl_phys(addr, val);
1563        break;
1564    default: abort();
1565    }
1566}
1567
1568static const MemoryRegionOps watch_mem_ops = {
1569    .read = watch_mem_read,
1570    .write = watch_mem_write,
1571    .endianness = DEVICE_NATIVE_ENDIAN,
1572};
1573
1574static uint64_t subpage_read(void *opaque, hwaddr addr,
1575                             unsigned len)
1576{
1577    subpage_t *subpage = opaque;
1578    uint8_t buf[4];
1579
1580#if defined(DEBUG_SUBPAGE)
1581    printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1582           subpage, len, addr);
1583#endif
1584    address_space_read(subpage->as, addr + subpage->base, buf, len);
1585    switch (len) {
1586    case 1:
1587        return ldub_p(buf);
1588    case 2:
1589        return lduw_p(buf);
1590    case 4:
1591        return ldl_p(buf);
1592    default:
1593        abort();
1594    }
1595}
1596
1597static void subpage_write(void *opaque, hwaddr addr,
1598                          uint64_t value, unsigned len)
1599{
1600    subpage_t *subpage = opaque;
1601    uint8_t buf[4];
1602
1603#if defined(DEBUG_SUBPAGE)
1604    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1605           " value %"PRIx64"\n",
1606           __func__, subpage, len, addr, value);
1607#endif
1608    switch (len) {
1609    case 1:
1610        stb_p(buf, value);
1611        break;
1612    case 2:
1613        stw_p(buf, value);
1614        break;
1615    case 4:
1616        stl_p(buf, value);
1617        break;
1618    default:
1619        abort();
1620    }
1621    address_space_write(subpage->as, addr + subpage->base, buf, len);
1622}
1623
1624static bool subpage_accepts(void *opaque, hwaddr addr,
1625                            unsigned size, bool is_write)
1626{
1627    subpage_t *subpage = opaque;
1628#if defined(DEBUG_SUBPAGE)
1629    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1630           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1631#endif
1632
1633    return address_space_access_valid(subpage->as, addr + subpage->base,
1634                                      size, is_write);
1635}
1636
1637static const MemoryRegionOps subpage_ops = {
1638    .read = subpage_read,
1639    .write = subpage_write,
1640    .valid.accepts = subpage_accepts,
1641    .endianness = DEVICE_NATIVE_ENDIAN,
1642};
1643
1644static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1645                             uint16_t section)
1646{
1647    int idx, eidx;
1648
1649    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1650        return -1;
1651    idx = SUBPAGE_IDX(start);
1652    eidx = SUBPAGE_IDX(end);
1653#if defined(DEBUG_SUBPAGE)
1654    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1655           mmio, start, end, idx, eidx, memory);
1656#endif
1657    for (; idx <= eidx; idx++) {
1658        mmio->sub_section[idx] = section;
1659    }
1660
1661    return 0;
1662}
1663
1664static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1665{
1666    subpage_t *mmio;
1667
1668    mmio = g_malloc0(sizeof(subpage_t));
1669
1670    mmio->as = as;
1671    mmio->base = base;
1672    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1673                          "subpage", TARGET_PAGE_SIZE);
1674    mmio->iomem.subpage = true;
1675#if defined(DEBUG_SUBPAGE)
1676    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1677           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1678#endif
1679    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1680
1681    return mmio;
1682}
1683
1684static uint16_t dummy_section(MemoryRegion *mr)
1685{
1686    MemoryRegionSection section = {
1687        .mr = mr,
1688        .offset_within_address_space = 0,
1689        .offset_within_region = 0,
1690        .size = int128_2_64(),
1691    };
1692
1693    return phys_section_add(&section);
1694}
1695
1696MemoryRegion *iotlb_to_region(hwaddr index)
1697{
1698    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1699}
1700
1701static void io_mem_init(void)
1702{
1703    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1704    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1705                          "unassigned", UINT64_MAX);
1706    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1707                          "notdirty", UINT64_MAX);
1708    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1709                          "watch", UINT64_MAX);
1710}
1711
1712static void mem_begin(MemoryListener *listener)
1713{
1714    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1715    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1716
1717    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1718    d->as = as;
1719    as->next_dispatch = d;
1720}
1721
1722static void mem_commit(MemoryListener *listener)
1723{
1724    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1725    AddressSpaceDispatch *cur = as->dispatch;
1726    AddressSpaceDispatch *next = as->next_dispatch;
1727
1728    next->nodes = next_map.nodes;
1729    next->sections = next_map.sections;
1730
1731    as->dispatch = next;
1732    g_free(cur);
1733}
1734
1735static void core_begin(MemoryListener *listener)
1736{
1737    uint16_t n;
1738
1739    prev_map = g_new(PhysPageMap, 1);
1740    *prev_map = next_map;
1741
1742    memset(&next_map, 0, sizeof(next_map));
1743    n = dummy_section(&io_mem_unassigned);
1744    assert(n == PHYS_SECTION_UNASSIGNED);
1745    n = dummy_section(&io_mem_notdirty);
1746    assert(n == PHYS_SECTION_NOTDIRTY);
1747    n = dummy_section(&io_mem_rom);
1748    assert(n == PHYS_SECTION_ROM);
1749    n = dummy_section(&io_mem_watch);
1750    assert(n == PHYS_SECTION_WATCH);
1751}
1752
1753/* This listener's commit run after the other AddressSpaceDispatch listeners'.
1754 * All AddressSpaceDispatch instances have switched to the next map.
1755 */
1756static void core_commit(MemoryListener *listener)
1757{
1758    phys_sections_free(prev_map);
1759}
1760
1761static void tcg_commit(MemoryListener *listener)
1762{
1763    CPUState *cpu;
1764
1765    /* since each CPU stores ram addresses in its TLB cache, we must
1766       reset the modified entries */
1767    /* XXX: slow ! */
1768    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1769        CPUArchState *env = cpu->env_ptr;
1770
1771        tlb_flush(env, 1);
1772    }
1773}
1774
1775static void core_log_global_start(MemoryListener *listener)
1776{
1777    cpu_physical_memory_set_dirty_tracking(1);
1778}
1779
1780static void core_log_global_stop(MemoryListener *listener)
1781{
1782    cpu_physical_memory_set_dirty_tracking(0);
1783}
1784
1785static MemoryListener core_memory_listener = {
1786    .begin = core_begin,
1787    .commit = core_commit,
1788    .log_global_start = core_log_global_start,
1789    .log_global_stop = core_log_global_stop,
1790    .priority = 1,
1791};
1792
1793static MemoryListener tcg_memory_listener = {
1794    .commit = tcg_commit,
1795};
1796
1797void address_space_init_dispatch(AddressSpace *as)
1798{
1799    as->dispatch = NULL;
1800    as->dispatch_listener = (MemoryListener) {
1801        .begin = mem_begin,
1802        .commit = mem_commit,
1803        .region_add = mem_add,
1804        .region_nop = mem_add,
1805        .priority = 0,
1806    };
1807    memory_listener_register(&as->dispatch_listener, as);
1808}
1809
1810void address_space_destroy_dispatch(AddressSpace *as)
1811{
1812    AddressSpaceDispatch *d = as->dispatch;
1813
1814    memory_listener_unregister(&as->dispatch_listener);
1815    g_free(d);
1816    as->dispatch = NULL;
1817}
1818
1819static void memory_map_init(void)
1820{
1821    system_memory = g_malloc(sizeof(*system_memory));
1822    memory_region_init(system_memory, NULL, "system", INT64_MAX);
1823    address_space_init(&address_space_memory, system_memory, "memory");
1824
1825    system_io = g_malloc(sizeof(*system_io));
1826    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1827                          65536);
1828    address_space_init(&address_space_io, system_io, "I/O");
1829
1830    memory_listener_register(&core_memory_listener, &address_space_memory);
1831    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1832}
1833
1834MemoryRegion *get_system_memory(void)
1835{
1836    return system_memory;
1837}
1838
1839MemoryRegion *get_system_io(void)
1840{
1841    return system_io;
1842}
1843
1844#endif /* !defined(CONFIG_USER_ONLY) */
1845
1846/* physical memory access (slow version, mainly for debug) */
1847#if defined(CONFIG_USER_ONLY)
1848int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1849                        uint8_t *buf, int len, int is_write)
1850{
1851    int l, flags;
1852    target_ulong page;
1853    void * p;
1854
1855    while (len > 0) {
1856        page = addr & TARGET_PAGE_MASK;
1857        l = (page + TARGET_PAGE_SIZE) - addr;
1858        if (l > len)
1859            l = len;
1860        flags = page_get_flags(page);
1861        if (!(flags & PAGE_VALID))
1862            return -1;
1863        if (is_write) {
1864            if (!(flags & PAGE_WRITE))
1865                return -1;
1866            /* XXX: this code should not depend on lock_user */
1867            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1868                return -1;
1869            memcpy(p, buf, l);
1870            unlock_user(p, addr, l);
1871        } else {
1872            if (!(flags & PAGE_READ))
1873                return -1;
1874            /* XXX: this code should not depend on lock_user */
1875            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1876                return -1;
1877            memcpy(buf, p, l);
1878            unlock_user(p, addr, 0);
1879        }
1880        len -= l;
1881        buf += l;
1882        addr += l;
1883    }
1884    return 0;
1885}
1886
1887#else
1888
1889static void invalidate_and_set_dirty(hwaddr addr,
1890                                     hwaddr length)
1891{
1892    if (!cpu_physical_memory_is_dirty(addr)) {
1893        /* invalidate code */
1894        tb_invalidate_phys_page_range(addr, addr + length, 0);
1895        /* set dirty bit */
1896        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1897    }
1898    xen_modified_memory(addr, length);
1899}
1900
1901static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1902{
1903    if (memory_region_is_ram(mr)) {
1904        return !(is_write && mr->readonly);
1905    }
1906    if (memory_region_is_romd(mr)) {
1907        return !is_write;
1908    }
1909
1910    return false;
1911}
1912
1913static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1914{
1915    unsigned access_size_max = mr->ops->valid.max_access_size;
1916
1917    /* Regions are assumed to support 1-4 byte accesses unless
1918       otherwise specified.  */
1919    if (access_size_max == 0) {
1920        access_size_max = 4;
1921    }
1922
1923    /* Bound the maximum access by the alignment of the address.  */
1924    if (!mr->ops->impl.unaligned) {
1925        unsigned align_size_max = addr & -addr;
1926        if (align_size_max != 0 && align_size_max < access_size_max) {
1927            access_size_max = align_size_max;
1928        }
1929    }
1930
1931    /* Don't attempt accesses larger than the maximum.  */
1932    if (l > access_size_max) {
1933        l = access_size_max;
1934    }
1935    if (l & (l - 1)) {
1936        l = 1 << (qemu_fls(l) - 1);
1937    }
1938
1939    return l;
1940}
1941
1942bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1943                      int len, bool is_write)
1944{
1945    hwaddr l;
1946    uint8_t *ptr;
1947    uint64_t val;
1948    hwaddr addr1;
1949    MemoryRegion *mr;
1950    bool error = false;
1951
1952    while (len > 0) {
1953        l = len;
1954        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1955
1956        if (is_write) {
1957            if (!memory_access_is_direct(mr, is_write)) {
1958                l = memory_access_size(mr, l, addr1);
1959                /* XXX: could force current_cpu to NULL to avoid
1960                   potential bugs */
1961                switch (l) {
1962                case 8:
1963                    /* 64 bit write access */
1964                    val = ldq_p(buf);
1965                    error |= io_mem_write(mr, addr1, val, 8);
1966                    break;
1967                case 4:
1968                    /* 32 bit write access */
1969                    val = ldl_p(buf);
1970                    error |= io_mem_write(mr, addr1, val, 4);
1971                    break;
1972                case 2:
1973                    /* 16 bit write access */
1974                    val = lduw_p(buf);
1975                    error |= io_mem_write(mr, addr1, val, 2);
1976                    break;
1977                case 1:
1978                    /* 8 bit write access */
1979                    val = ldub_p(buf);
1980                    error |= io_mem_write(mr, addr1, val, 1);
1981                    break;
1982                default:
1983                    abort();
1984                }
1985            } else {
1986                addr1 += memory_region_get_ram_addr(mr);
1987                /* RAM case */
1988                ptr = qemu_get_ram_ptr(addr1);
1989                memcpy(ptr, buf, l);
1990                invalidate_and_set_dirty(addr1, l);
1991            }
1992        } else {
1993            if (!memory_access_is_direct(mr, is_write)) {
1994                /* I/O case */
1995                l = memory_access_size(mr, l, addr1);
1996                switch (l) {
1997                case 8:
1998                    /* 64 bit read access */
1999                    error |= io_mem_read(mr, addr1, &val, 8);
2000                    stq_p(buf, val);
2001                    break;
2002                case 4:
2003                    /* 32 bit read access */
2004                    error |= io_mem_read(mr, addr1, &val, 4);
2005                    stl_p(buf, val);
2006                    break;
2007                case 2:
2008                    /* 16 bit read access */
2009                    error |= io_mem_read(mr, addr1, &val, 2);
2010                    stw_p(buf, val);
2011                    break;
2012                case 1:
2013                    /* 8 bit read access */
2014                    error |= io_mem_read(mr, addr1, &val, 1);
2015                    stb_p(buf, val);
2016                    break;
2017                default:
2018                    abort();
2019                }
2020            } else {
2021                /* RAM case */
2022                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2023                memcpy(buf, ptr, l);
2024            }
2025        }
2026        len -= l;
2027        buf += l;
2028        addr += l;
2029    }
2030
2031    return error;
2032}
2033
2034bool address_space_write(AddressSpace *as, hwaddr addr,
2035                         const uint8_t *buf, int len)
2036{
2037    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2038}
2039
2040bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2041{
2042    return address_space_rw(as, addr, buf, len, false);
2043}
2044
2045
2046void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2047                            int len, int is_write)
2048{
2049    address_space_rw(&address_space_memory, addr, buf, len, is_write);
2050}
2051
2052/* used for ROM loading : can write in RAM and ROM */
2053void cpu_physical_memory_write_rom(hwaddr addr,
2054                                   const uint8_t *buf, int len)
2055{
2056    hwaddr l;
2057    uint8_t *ptr;
2058    hwaddr addr1;
2059    MemoryRegion *mr;
2060
2061    while (len > 0) {
2062        l = len;
2063        mr = address_space_translate(&address_space_memory,
2064                                     addr, &addr1, &l, true);
2065
2066        if (!(memory_region_is_ram(mr) ||
2067              memory_region_is_romd(mr))) {
2068            /* do nothing */
2069        } else {
2070            addr1 += memory_region_get_ram_addr(mr);
2071            /* ROM/RAM case */
2072            ptr = qemu_get_ram_ptr(addr1);
2073            memcpy(ptr, buf, l);
2074            invalidate_and_set_dirty(addr1, l);
2075        }
2076        len -= l;
2077        buf += l;
2078        addr += l;
2079    }
2080}
2081
2082typedef struct {
2083    MemoryRegion *mr;
2084    void *buffer;
2085    hwaddr addr;
2086    hwaddr len;
2087} BounceBuffer;
2088
2089static BounceBuffer bounce;
2090
2091typedef struct MapClient {
2092    void *opaque;
2093    void (*callback)(void *opaque);
2094    QLIST_ENTRY(MapClient) link;
2095} MapClient;
2096
2097static QLIST_HEAD(map_client_list, MapClient) map_client_list
2098    = QLIST_HEAD_INITIALIZER(map_client_list);
2099
2100void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2101{
2102    MapClient *client = g_malloc(sizeof(*client));
2103
2104    client->opaque = opaque;
2105    client->callback = callback;
2106    QLIST_INSERT_HEAD(&map_client_list, client, link);
2107    return client;
2108}
2109
2110static void cpu_unregister_map_client(void *_client)
2111{
2112    MapClient *client = (MapClient *)_client;
2113
2114    QLIST_REMOVE(client, link);
2115    g_free(client);
2116}
2117
2118static void cpu_notify_map_clients(void)
2119{
2120    MapClient *client;
2121
2122    while (!QLIST_EMPTY(&map_client_list)) {
2123        client = QLIST_FIRST(&map_client_list);
2124        client->callback(client->opaque);
2125        cpu_unregister_map_client(client);
2126    }
2127}
2128
2129bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2130{
2131    MemoryRegion *mr;
2132    hwaddr l, xlat;
2133
2134    while (len > 0) {
2135        l = len;
2136        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2137        if (!memory_access_is_direct(mr, is_write)) {
2138            l = memory_access_size(mr, l, addr);
2139            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2140                return false;
2141            }
2142        }
2143
2144        len -= l;
2145        addr += l;
2146    }
2147    return true;
2148}
2149
2150/* Map a physical memory region into a host virtual address.
2151 * May map a subset of the requested range, given by and returned in *plen.
2152 * May return NULL if resources needed to perform the mapping are exhausted.
2153 * Use only for reads OR writes - not for read-modify-write operations.
2154 * Use cpu_register_map_client() to know when retrying the map operation is
2155 * likely to succeed.
2156 */
2157void *address_space_map(AddressSpace *as,
2158                        hwaddr addr,
2159                        hwaddr *plen,
2160                        bool is_write)
2161{
2162    hwaddr len = *plen;
2163    hwaddr done = 0;
2164    hwaddr l, xlat, base;
2165    MemoryRegion *mr, *this_mr;
2166    ram_addr_t raddr;
2167
2168    if (len == 0) {
2169        return NULL;
2170    }
2171
2172    l = len;
2173    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2174    if (!memory_access_is_direct(mr, is_write)) {
2175        if (bounce.buffer) {
2176            return NULL;
2177        }
2178        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2179        bounce.addr = addr;
2180        bounce.len = l;
2181
2182        memory_region_ref(mr);
2183        bounce.mr = mr;
2184        if (!is_write) {
2185            address_space_read(as, addr, bounce.buffer, l);
2186        }
2187
2188        *plen = l;
2189        return bounce.buffer;
2190    }
2191
2192    base = xlat;
2193    raddr = memory_region_get_ram_addr(mr);
2194
2195    for (;;) {
2196        len -= l;
2197        addr += l;
2198        done += l;
2199        if (len == 0) {
2200            break;
2201        }
2202
2203        l = len;
2204        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2205        if (this_mr != mr || xlat != base + done) {
2206            break;
2207        }
2208    }
2209
2210    memory_region_ref(mr);
2211    *plen = done;
2212    return qemu_ram_ptr_length(raddr + base, plen);
2213}
2214
2215/* Unmaps a memory region previously mapped by address_space_map().
2216 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2217 * the amount of memory that was actually read or written by the caller.
2218 */
2219void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2220                         int is_write, hwaddr access_len)
2221{
2222    if (buffer != bounce.buffer) {
2223        MemoryRegion *mr;
2224        ram_addr_t addr1;
2225
2226        mr = qemu_ram_addr_from_host(buffer, &addr1);
2227        assert(mr != NULL);
2228        if (is_write) {
2229            while (access_len) {
2230                unsigned l;
2231                l = TARGET_PAGE_SIZE;
2232                if (l > access_len)
2233                    l = access_len;
2234                invalidate_and_set_dirty(addr1, l);
2235                addr1 += l;
2236                access_len -= l;
2237            }
2238        }
2239        if (xen_enabled()) {
2240            xen_invalidate_map_cache_entry(buffer);
2241        }
2242        memory_region_unref(mr);
2243        return;
2244    }
2245    if (is_write) {
2246        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2247    }
2248    qemu_vfree(bounce.buffer);
2249    bounce.buffer = NULL;
2250    memory_region_unref(bounce.mr);
2251    cpu_notify_map_clients();
2252}
2253
2254void *cpu_physical_memory_map(hwaddr addr,
2255                              hwaddr *plen,
2256                              int is_write)
2257{
2258    return address_space_map(&address_space_memory, addr, plen, is_write);
2259}
2260
2261void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2262                               int is_write, hwaddr access_len)
2263{
2264    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2265}
2266
2267/* warning: addr must be aligned */
2268static inline uint32_t ldl_phys_internal(hwaddr addr,
2269                                         enum device_endian endian)
2270{
2271    uint8_t *ptr;
2272    uint64_t val;
2273    MemoryRegion *mr;
2274    hwaddr l = 4;
2275    hwaddr addr1;
2276
2277    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2278                                 false);
2279    if (l < 4 || !memory_access_is_direct(mr, false)) {
2280        /* I/O case */
2281        io_mem_read(mr, addr1, &val, 4);
2282#if defined(TARGET_WORDS_BIGENDIAN)
2283        if (endian == DEVICE_LITTLE_ENDIAN) {
2284            val = bswap32(val);
2285        }
2286#else
2287        if (endian == DEVICE_BIG_ENDIAN) {
2288            val = bswap32(val);
2289        }
2290#endif
2291    } else {
2292        /* RAM case */
2293        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2294                                & TARGET_PAGE_MASK)
2295                               + addr1);
2296        switch (endian) {
2297        case DEVICE_LITTLE_ENDIAN:
2298            val = ldl_le_p(ptr);
2299            break;
2300        case DEVICE_BIG_ENDIAN:
2301            val = ldl_be_p(ptr);
2302            break;
2303        default:
2304            val = ldl_p(ptr);
2305            break;
2306        }
2307    }
2308    return val;
2309}
2310
2311uint32_t ldl_phys(hwaddr addr)
2312{
2313    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2314}
2315
2316uint32_t ldl_le_phys(hwaddr addr)
2317{
2318    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2319}
2320
2321uint32_t ldl_be_phys(hwaddr addr)
2322{
2323    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2324}
2325
2326/* warning: addr must be aligned */
2327static inline uint64_t ldq_phys_internal(hwaddr addr,
2328                                         enum device_endian endian)
2329{
2330    uint8_t *ptr;
2331    uint64_t val;
2332    MemoryRegion *mr;
2333    hwaddr l = 8;
2334    hwaddr addr1;
2335
2336    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2337                                 false);
2338    if (l < 8 || !memory_access_is_direct(mr, false)) {
2339        /* I/O case */
2340        io_mem_read(mr, addr1, &val, 8);
2341#if defined(TARGET_WORDS_BIGENDIAN)
2342        if (endian == DEVICE_LITTLE_ENDIAN) {
2343            val = bswap64(val);
2344        }
2345#else
2346        if (endian == DEVICE_BIG_ENDIAN) {
2347            val = bswap64(val);
2348        }
2349#endif
2350    } else {
2351        /* RAM case */
2352        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2353                                & TARGET_PAGE_MASK)
2354                               + addr1);
2355        switch (endian) {
2356        case DEVICE_LITTLE_ENDIAN:
2357            val = ldq_le_p(ptr);
2358            break;
2359        case DEVICE_BIG_ENDIAN:
2360            val = ldq_be_p(ptr);
2361            break;
2362        default:
2363            val = ldq_p(ptr);
2364            break;
2365        }
2366    }
2367    return val;
2368}
2369
2370uint64_t ldq_phys(hwaddr addr)
2371{
2372    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2373}
2374
2375uint64_t ldq_le_phys(hwaddr addr)
2376{
2377    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2378}
2379
2380uint64_t ldq_be_phys(hwaddr addr)
2381{
2382    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2383}
2384
2385/* XXX: optimize */
2386uint32_t ldub_phys(hwaddr addr)
2387{
2388    uint8_t val;
2389    cpu_physical_memory_read(addr, &val, 1);
2390    return val;
2391}
2392
2393/* warning: addr must be aligned */
2394static inline uint32_t lduw_phys_internal(hwaddr addr,
2395                                          enum device_endian endian)
2396{
2397    uint8_t *ptr;
2398    uint64_t val;
2399    MemoryRegion *mr;
2400    hwaddr l = 2;
2401    hwaddr addr1;
2402
2403    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2404                                 false);
2405    if (l < 2 || !memory_access_is_direct(mr, false)) {
2406        /* I/O case */
2407        io_mem_read(mr, addr1, &val, 2);
2408#if defined(TARGET_WORDS_BIGENDIAN)
2409        if (endian == DEVICE_LITTLE_ENDIAN) {
2410            val = bswap16(val);
2411        }
2412#else
2413        if (endian == DEVICE_BIG_ENDIAN) {
2414            val = bswap16(val);
2415        }
2416#endif
2417    } else {
2418        /* RAM case */
2419        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2420                                & TARGET_PAGE_MASK)
2421                               + addr1);
2422        switch (endian) {
2423        case DEVICE_LITTLE_ENDIAN:
2424            val = lduw_le_p(ptr);
2425            break;
2426        case DEVICE_BIG_ENDIAN:
2427            val = lduw_be_p(ptr);
2428            break;
2429        default:
2430            val = lduw_p(ptr);
2431            break;
2432        }
2433    }
2434    return val;
2435}
2436
2437uint32_t lduw_phys(hwaddr addr)
2438{
2439    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2440}
2441
2442uint32_t lduw_le_phys(hwaddr addr)
2443{
2444    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2445}
2446
2447uint32_t lduw_be_phys(hwaddr addr)
2448{
2449    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2450}
2451
2452/* warning: addr must be aligned. The ram page is not masked as dirty
2453   and the code inside is not invalidated. It is useful if the dirty
2454   bits are used to track modified PTEs */
2455void stl_phys_notdirty(hwaddr addr, uint32_t val)
2456{
2457    uint8_t *ptr;
2458    MemoryRegion *mr;
2459    hwaddr l = 4;
2460    hwaddr addr1;
2461
2462    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2463                                 true);
2464    if (l < 4 || !memory_access_is_direct(mr, true)) {
2465        io_mem_write(mr, addr1, val, 4);
2466    } else {
2467        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2468        ptr = qemu_get_ram_ptr(addr1);
2469        stl_p(ptr, val);
2470
2471        if (unlikely(in_migration)) {
2472            if (!cpu_physical_memory_is_dirty(addr1)) {
2473                /* invalidate code */
2474                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2475                /* set dirty bit */
2476                cpu_physical_memory_set_dirty_flags(
2477                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2478            }
2479        }
2480    }
2481}
2482
2483/* warning: addr must be aligned */
2484static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2485                                     enum device_endian endian)
2486{
2487    uint8_t *ptr;
2488    MemoryRegion *mr;
2489    hwaddr l = 4;
2490    hwaddr addr1;
2491
2492    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2493                                 true);
2494    if (l < 4 || !memory_access_is_direct(mr, true)) {
2495#if defined(TARGET_WORDS_BIGENDIAN)
2496        if (endian == DEVICE_LITTLE_ENDIAN) {
2497            val = bswap32(val);
2498        }
2499#else
2500        if (endian == DEVICE_BIG_ENDIAN) {
2501            val = bswap32(val);
2502        }
2503#endif
2504        io_mem_write(mr, addr1, val, 4);
2505    } else {
2506        /* RAM case */
2507        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2508        ptr = qemu_get_ram_ptr(addr1);
2509        switch (endian) {
2510        case DEVICE_LITTLE_ENDIAN:
2511            stl_le_p(ptr, val);
2512            break;
2513        case DEVICE_BIG_ENDIAN:
2514            stl_be_p(ptr, val);
2515            break;
2516        default:
2517            stl_p(ptr, val);
2518            break;
2519        }
2520        invalidate_and_set_dirty(addr1, 4);
2521    }
2522}
2523
2524void stl_phys(hwaddr addr, uint32_t val)
2525{
2526    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2527}
2528
2529void stl_le_phys(hwaddr addr, uint32_t val)
2530{
2531    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2532}
2533
2534void stl_be_phys(hwaddr addr, uint32_t val)
2535{
2536    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2537}
2538
2539/* XXX: optimize */
2540void stb_phys(hwaddr addr, uint32_t val)
2541{
2542    uint8_t v = val;
2543    cpu_physical_memory_write(addr, &v, 1);
2544}
2545
2546/* warning: addr must be aligned */
2547static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2548                                     enum device_endian endian)
2549{
2550    uint8_t *ptr;
2551    MemoryRegion *mr;
2552    hwaddr l = 2;
2553    hwaddr addr1;
2554
2555    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2556                                 true);
2557    if (l < 2 || !memory_access_is_direct(mr, true)) {
2558#if defined(TARGET_WORDS_BIGENDIAN)
2559        if (endian == DEVICE_LITTLE_ENDIAN) {
2560            val = bswap16(val);
2561        }
2562#else
2563        if (endian == DEVICE_BIG_ENDIAN) {
2564            val = bswap16(val);
2565        }
2566#endif
2567        io_mem_write(mr, addr1, val, 2);
2568    } else {
2569        /* RAM case */
2570        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2571        ptr = qemu_get_ram_ptr(addr1);
2572        switch (endian) {
2573        case DEVICE_LITTLE_ENDIAN:
2574            stw_le_p(ptr, val);
2575            break;
2576        case DEVICE_BIG_ENDIAN:
2577            stw_be_p(ptr, val);
2578            break;
2579        default:
2580            stw_p(ptr, val);
2581            break;
2582        }
2583        invalidate_and_set_dirty(addr1, 2);
2584    }
2585}
2586
2587void stw_phys(hwaddr addr, uint32_t val)
2588{
2589    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2590}
2591
2592void stw_le_phys(hwaddr addr, uint32_t val)
2593{
2594    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2595}
2596
2597void stw_be_phys(hwaddr addr, uint32_t val)
2598{
2599    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2600}
2601
2602/* XXX: optimize */
2603void stq_phys(hwaddr addr, uint64_t val)
2604{
2605    val = tswap64(val);
2606    cpu_physical_memory_write(addr, &val, 8);
2607}
2608
2609void stq_le_phys(hwaddr addr, uint64_t val)
2610{
2611    val = cpu_to_le64(val);
2612    cpu_physical_memory_write(addr, &val, 8);
2613}
2614
2615void stq_be_phys(hwaddr addr, uint64_t val)
2616{
2617    val = cpu_to_be64(val);
2618    cpu_physical_memory_write(addr, &val, 8);
2619}
2620
2621/* virtual memory access for debug (includes writing to ROM) */
2622int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2623                        uint8_t *buf, int len, int is_write)
2624{
2625    int l;
2626    hwaddr phys_addr;
2627    target_ulong page;
2628
2629    while (len > 0) {
2630        page = addr & TARGET_PAGE_MASK;
2631        phys_addr = cpu_get_phys_page_debug(cpu, page);
2632        /* if no physical page mapped, return an error */
2633        if (phys_addr == -1)
2634            return -1;
2635        l = (page + TARGET_PAGE_SIZE) - addr;
2636        if (l > len)
2637            l = len;
2638        phys_addr += (addr & ~TARGET_PAGE_MASK);
2639        if (is_write)
2640            cpu_physical_memory_write_rom(phys_addr, buf, l);
2641        else
2642            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2643        len -= l;
2644        buf += l;
2645        addr += l;
2646    }
2647    return 0;
2648}
2649#endif
2650
2651#if !defined(CONFIG_USER_ONLY)
2652
2653/*
2654 * A helper function for the _utterly broken_ virtio device model to find out if
2655 * it's running on a big endian machine. Don't do this at home kids!
2656 */
2657bool virtio_is_big_endian(void);
2658bool virtio_is_big_endian(void)
2659{
2660#if defined(TARGET_WORDS_BIGENDIAN)
2661    return true;
2662#else
2663    return false;
2664#endif
2665}
2666
2667#endif
2668
2669#ifndef CONFIG_USER_ONLY
2670bool cpu_physical_memory_is_io(hwaddr phys_addr)
2671{
2672    MemoryRegion*mr;
2673    hwaddr l = 1;
2674
2675    mr = address_space_translate(&address_space_memory,
2676                                 phys_addr, &phys_addr, &l, false);
2677
2678    return !(memory_region_is_ram(mr) ||
2679             memory_region_is_romd(mr));
2680}
2681
2682void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2683{
2684    RAMBlock *block;
2685
2686    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2687        func(block->host, block->offset, block->length, opaque);
2688    }
2689}
2690#endif
2691