qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifndef _WIN32
  21#include <sys/types.h>
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu-common.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "qemu/osdep.h"
  34#include "sysemu/kvm.h"
  35#include "sysemu/sysemu.h"
  36#include "hw/xen/xen.h"
  37#include "qemu/timer.h"
  38#include "qemu/config-file.h"
  39#include "qemu/error-report.h"
  40#include "exec/memory.h"
  41#include "sysemu/dma.h"
  42#include "exec/address-spaces.h"
  43#if defined(CONFIG_USER_ONLY)
  44#include <qemu.h>
  45#else /* !CONFIG_USER_ONLY */
  46#include "sysemu/xen-mapcache.h"
  47#include "trace.h"
  48#endif
  49#include "exec/cpu-all.h"
  50#include "qemu/rcu_queue.h"
  51#include "qemu/main-loop.h"
  52#include "exec/cputlb.h"
  53#include "translate-all.h"
  54
  55#include "exec/memory-internal.h"
  56#include "exec/ram_addr.h"
  57
  58#include "qemu/range.h"
  59
  60//#define DEBUG_SUBPAGE
  61
  62#if !defined(CONFIG_USER_ONLY)
  63/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  64 * are protected by the ramlist lock.
  65 */
  66RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  67
  68static MemoryRegion *system_memory;
  69static MemoryRegion *system_io;
  70
  71AddressSpace address_space_io;
  72AddressSpace address_space_memory;
  73
  74MemoryRegion io_mem_rom, io_mem_notdirty;
  75static MemoryRegion io_mem_unassigned;
  76
  77/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  78#define RAM_PREALLOC   (1 << 0)
  79
  80/* RAM is mmap-ed with MAP_SHARED */
  81#define RAM_SHARED     (1 << 1)
  82
  83/* Only a portion of RAM (used_length) is actually used, and migrated.
  84 * This used_length size can change across reboots.
  85 */
  86#define RAM_RESIZEABLE (1 << 2)
  87
  88#endif
  89
  90struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  91/* current CPU in the current thread. It is only valid inside
  92   cpu_exec() */
  93DEFINE_TLS(CPUState *, current_cpu);
  94/* 0 = Do not count executed instructions.
  95   1 = Precise instruction counting.
  96   2 = Adaptive rate instruction counting.  */
  97int use_icount;
  98
  99#if !defined(CONFIG_USER_ONLY)
 100
 101typedef struct PhysPageEntry PhysPageEntry;
 102
 103struct PhysPageEntry {
 104    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 105    uint32_t skip : 6;
 106     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 107    uint32_t ptr : 26;
 108};
 109
 110#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 111
 112/* Size of the L2 (and L3, etc) page tables.  */
 113#define ADDR_SPACE_BITS 64
 114
 115#define P_L2_BITS 9
 116#define P_L2_SIZE (1 << P_L2_BITS)
 117
 118#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 119
 120typedef PhysPageEntry Node[P_L2_SIZE];
 121
 122typedef struct PhysPageMap {
 123    struct rcu_head rcu;
 124
 125    unsigned sections_nb;
 126    unsigned sections_nb_alloc;
 127    unsigned nodes_nb;
 128    unsigned nodes_nb_alloc;
 129    Node *nodes;
 130    MemoryRegionSection *sections;
 131} PhysPageMap;
 132
 133struct AddressSpaceDispatch {
 134    struct rcu_head rcu;
 135
 136    /* This is a multi-level map on the physical address space.
 137     * The bottom level has pointers to MemoryRegionSections.
 138     */
 139    PhysPageEntry phys_map;
 140    PhysPageMap map;
 141    AddressSpace *as;
 142};
 143
 144#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 145typedef struct subpage_t {
 146    MemoryRegion iomem;
 147    AddressSpace *as;
 148    hwaddr base;
 149    uint16_t sub_section[TARGET_PAGE_SIZE];
 150} subpage_t;
 151
 152#define PHYS_SECTION_UNASSIGNED 0
 153#define PHYS_SECTION_NOTDIRTY 1
 154#define PHYS_SECTION_ROM 2
 155#define PHYS_SECTION_WATCH 3
 156
 157static void io_mem_init(void);
 158static void memory_map_init(void);
 159static void tcg_commit(MemoryListener *listener);
 160
 161static MemoryRegion io_mem_watch;
 162#endif
 163
 164#if !defined(CONFIG_USER_ONLY)
 165
 166static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 167{
 168    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 169        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 170        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 171        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 172    }
 173}
 174
 175static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 176{
 177    unsigned i;
 178    uint32_t ret;
 179    PhysPageEntry e;
 180    PhysPageEntry *p;
 181
 182    ret = map->nodes_nb++;
 183    p = map->nodes[ret];
 184    assert(ret != PHYS_MAP_NODE_NIL);
 185    assert(ret != map->nodes_nb_alloc);
 186
 187    e.skip = leaf ? 0 : 1;
 188    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 189    for (i = 0; i < P_L2_SIZE; ++i) {
 190        memcpy(&p[i], &e, sizeof(e));
 191    }
 192    return ret;
 193}
 194
 195static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 196                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 197                                int level)
 198{
 199    PhysPageEntry *p;
 200    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 201
 202    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 203        lp->ptr = phys_map_node_alloc(map, level == 0);
 204    }
 205    p = map->nodes[lp->ptr];
 206    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 207
 208    while (*nb && lp < &p[P_L2_SIZE]) {
 209        if ((*index & (step - 1)) == 0 && *nb >= step) {
 210            lp->skip = 0;
 211            lp->ptr = leaf;
 212            *index += step;
 213            *nb -= step;
 214        } else {
 215            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 216        }
 217        ++lp;
 218    }
 219}
 220
 221static void phys_page_set(AddressSpaceDispatch *d,
 222                          hwaddr index, hwaddr nb,
 223                          uint16_t leaf)
 224{
 225    /* Wildly overreserve - it doesn't matter much. */
 226    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 227
 228    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 229}
 230
 231/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 232 * and update our entry so we can skip it and go directly to the destination.
 233 */
 234static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 235{
 236    unsigned valid_ptr = P_L2_SIZE;
 237    int valid = 0;
 238    PhysPageEntry *p;
 239    int i;
 240
 241    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 242        return;
 243    }
 244
 245    p = nodes[lp->ptr];
 246    for (i = 0; i < P_L2_SIZE; i++) {
 247        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 248            continue;
 249        }
 250
 251        valid_ptr = i;
 252        valid++;
 253        if (p[i].skip) {
 254            phys_page_compact(&p[i], nodes, compacted);
 255        }
 256    }
 257
 258    /* We can only compress if there's only one child. */
 259    if (valid != 1) {
 260        return;
 261    }
 262
 263    assert(valid_ptr < P_L2_SIZE);
 264
 265    /* Don't compress if it won't fit in the # of bits we have. */
 266    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 267        return;
 268    }
 269
 270    lp->ptr = p[valid_ptr].ptr;
 271    if (!p[valid_ptr].skip) {
 272        /* If our only child is a leaf, make this a leaf. */
 273        /* By design, we should have made this node a leaf to begin with so we
 274         * should never reach here.
 275         * But since it's so simple to handle this, let's do it just in case we
 276         * change this rule.
 277         */
 278        lp->skip = 0;
 279    } else {
 280        lp->skip += p[valid_ptr].skip;
 281    }
 282}
 283
 284static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 285{
 286    DECLARE_BITMAP(compacted, nodes_nb);
 287
 288    if (d->phys_map.skip) {
 289        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 290    }
 291}
 292
 293static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 294                                           Node *nodes, MemoryRegionSection *sections)
 295{
 296    PhysPageEntry *p;
 297    hwaddr index = addr >> TARGET_PAGE_BITS;
 298    int i;
 299
 300    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 301        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 302            return &sections[PHYS_SECTION_UNASSIGNED];
 303        }
 304        p = nodes[lp.ptr];
 305        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 306    }
 307
 308    if (sections[lp.ptr].size.hi ||
 309        range_covers_byte(sections[lp.ptr].offset_within_address_space,
 310                          sections[lp.ptr].size.lo, addr)) {
 311        return &sections[lp.ptr];
 312    } else {
 313        return &sections[PHYS_SECTION_UNASSIGNED];
 314    }
 315}
 316
 317bool memory_region_is_unassigned(MemoryRegion *mr)
 318{
 319    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 320        && mr != &io_mem_watch;
 321}
 322
 323/* Called from RCU critical section */
 324static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 325                                                        hwaddr addr,
 326                                                        bool resolve_subpage)
 327{
 328    MemoryRegionSection *section;
 329    subpage_t *subpage;
 330
 331    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 332    if (resolve_subpage && section->mr->subpage) {
 333        subpage = container_of(section->mr, subpage_t, iomem);
 334        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 335    }
 336    return section;
 337}
 338
 339/* Called from RCU critical section */
 340static MemoryRegionSection *
 341address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 342                                 hwaddr *plen, bool resolve_subpage)
 343{
 344    MemoryRegionSection *section;
 345    MemoryRegion *mr;
 346    Int128 diff;
 347
 348    section = address_space_lookup_region(d, addr, resolve_subpage);
 349    /* Compute offset within MemoryRegionSection */
 350    addr -= section->offset_within_address_space;
 351
 352    /* Compute offset within MemoryRegion */
 353    *xlat = addr + section->offset_within_region;
 354
 355    mr = section->mr;
 356
 357    /* MMIO registers can be expected to perform full-width accesses based only
 358     * on their address, without considering adjacent registers that could
 359     * decode to completely different MemoryRegions.  When such registers
 360     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 361     * regions overlap wildly.  For this reason we cannot clamp the accesses
 362     * here.
 363     *
 364     * If the length is small (as is the case for address_space_ldl/stl),
 365     * everything works fine.  If the incoming length is large, however,
 366     * the caller really has to do the clamping through memory_access_size.
 367     */
 368    if (memory_region_is_ram(mr)) {
 369        diff = int128_sub(section->size, int128_make64(addr));
 370        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 371    }
 372    return section;
 373}
 374
 375static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 376{
 377    if (memory_region_is_ram(mr)) {
 378        return !(is_write && mr->readonly);
 379    }
 380    if (memory_region_is_romd(mr)) {
 381        return !is_write;
 382    }
 383
 384    return false;
 385}
 386
 387/* Called from RCU critical section */
 388MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 389                                      hwaddr *xlat, hwaddr *plen,
 390                                      bool is_write)
 391{
 392    IOMMUTLBEntry iotlb;
 393    MemoryRegionSection *section;
 394    MemoryRegion *mr;
 395
 396    for (;;) {
 397        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 398        section = address_space_translate_internal(d, addr, &addr, plen, true);
 399        mr = section->mr;
 400
 401        if (!mr->iommu_ops) {
 402            break;
 403        }
 404
 405        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 406        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 407                | (addr & iotlb.addr_mask));
 408        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 409        if (!(iotlb.perm & (1 << is_write))) {
 410            mr = &io_mem_unassigned;
 411            break;
 412        }
 413
 414        as = iotlb.target_as;
 415    }
 416
 417    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 418        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 419        *plen = MIN(page, *plen);
 420    }
 421
 422    *xlat = addr;
 423    return mr;
 424}
 425
 426/* Called from RCU critical section */
 427MemoryRegionSection *
 428address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 429                                  hwaddr *xlat, hwaddr *plen)
 430{
 431    MemoryRegionSection *section;
 432    section = address_space_translate_internal(cpu->memory_dispatch,
 433                                               addr, xlat, plen, false);
 434
 435    assert(!section->mr->iommu_ops);
 436    return section;
 437}
 438#endif
 439
 440#if !defined(CONFIG_USER_ONLY)
 441
 442static int cpu_common_post_load(void *opaque, int version_id)
 443{
 444    CPUState *cpu = opaque;
 445
 446    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 447       version_id is increased. */
 448    cpu->interrupt_request &= ~0x01;
 449    tlb_flush(cpu, 1);
 450
 451    return 0;
 452}
 453
 454static int cpu_common_pre_load(void *opaque)
 455{
 456    CPUState *cpu = opaque;
 457
 458    cpu->exception_index = -1;
 459
 460    return 0;
 461}
 462
 463static bool cpu_common_exception_index_needed(void *opaque)
 464{
 465    CPUState *cpu = opaque;
 466
 467    return tcg_enabled() && cpu->exception_index != -1;
 468}
 469
 470static const VMStateDescription vmstate_cpu_common_exception_index = {
 471    .name = "cpu_common/exception_index",
 472    .version_id = 1,
 473    .minimum_version_id = 1,
 474    .needed = cpu_common_exception_index_needed,
 475    .fields = (VMStateField[]) {
 476        VMSTATE_INT32(exception_index, CPUState),
 477        VMSTATE_END_OF_LIST()
 478    }
 479};
 480
 481const VMStateDescription vmstate_cpu_common = {
 482    .name = "cpu_common",
 483    .version_id = 1,
 484    .minimum_version_id = 1,
 485    .pre_load = cpu_common_pre_load,
 486    .post_load = cpu_common_post_load,
 487    .fields = (VMStateField[]) {
 488        VMSTATE_UINT32(halted, CPUState),
 489        VMSTATE_UINT32(interrupt_request, CPUState),
 490        VMSTATE_END_OF_LIST()
 491    },
 492    .subsections = (const VMStateDescription*[]) {
 493        &vmstate_cpu_common_exception_index,
 494        NULL
 495    }
 496};
 497
 498#endif
 499
 500CPUState *qemu_get_cpu(int index)
 501{
 502    CPUState *cpu;
 503
 504    CPU_FOREACH(cpu) {
 505        if (cpu->cpu_index == index) {
 506            return cpu;
 507        }
 508    }
 509
 510    return NULL;
 511}
 512
 513#if !defined(CONFIG_USER_ONLY)
 514void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 515{
 516    /* We only support one address space per cpu at the moment.  */
 517    assert(cpu->as == as);
 518
 519    if (cpu->tcg_as_listener) {
 520        memory_listener_unregister(cpu->tcg_as_listener);
 521    } else {
 522        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
 523    }
 524    cpu->tcg_as_listener->commit = tcg_commit;
 525    memory_listener_register(cpu->tcg_as_listener, as);
 526}
 527#endif
 528
 529#ifndef CONFIG_USER_ONLY
 530static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 531
 532static int cpu_get_free_index(Error **errp)
 533{
 534    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 535
 536    if (cpu >= MAX_CPUMASK_BITS) {
 537        error_setg(errp, "Trying to use more CPUs than max of %d",
 538                   MAX_CPUMASK_BITS);
 539        return -1;
 540    }
 541
 542    bitmap_set(cpu_index_map, cpu, 1);
 543    return cpu;
 544}
 545
 546void cpu_exec_exit(CPUState *cpu)
 547{
 548    if (cpu->cpu_index == -1) {
 549        /* cpu_index was never allocated by this @cpu or was already freed. */
 550        return;
 551    }
 552
 553    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 554    cpu->cpu_index = -1;
 555}
 556#else
 557
 558static int cpu_get_free_index(Error **errp)
 559{
 560    CPUState *some_cpu;
 561    int cpu_index = 0;
 562
 563    CPU_FOREACH(some_cpu) {
 564        cpu_index++;
 565    }
 566    return cpu_index;
 567}
 568
 569void cpu_exec_exit(CPUState *cpu)
 570{
 571}
 572#endif
 573
 574void cpu_exec_init(CPUState *cpu, Error **errp)
 575{
 576    CPUClass *cc = CPU_GET_CLASS(cpu);
 577    int cpu_index;
 578    Error *local_err = NULL;
 579
 580#ifndef CONFIG_USER_ONLY
 581    cpu->as = &address_space_memory;
 582    cpu->thread_id = qemu_get_thread_id();
 583    cpu_reload_memory_map(cpu);
 584#endif
 585
 586#if defined(CONFIG_USER_ONLY)
 587    cpu_list_lock();
 588#endif
 589    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 590    if (local_err) {
 591        error_propagate(errp, local_err);
 592#if defined(CONFIG_USER_ONLY)
 593        cpu_list_unlock();
 594#endif
 595        return;
 596    }
 597    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 598#if defined(CONFIG_USER_ONLY)
 599    cpu_list_unlock();
 600#endif
 601    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 602        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 603    }
 604#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 605    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 606                    cpu_save, cpu_load, cpu->env_ptr);
 607    assert(cc->vmsd == NULL);
 608    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 609#endif
 610    if (cc->vmsd != NULL) {
 611        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 612    }
 613}
 614
 615#if defined(CONFIG_USER_ONLY)
 616static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 617{
 618    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 619}
 620#else
 621static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 622{
 623    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 624    if (phys != -1) {
 625        tb_invalidate_phys_addr(cpu->as,
 626                                phys | (pc & ~TARGET_PAGE_MASK));
 627    }
 628}
 629#endif
 630
 631#if defined(CONFIG_USER_ONLY)
 632void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 633
 634{
 635}
 636
 637int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 638                          int flags)
 639{
 640    return -ENOSYS;
 641}
 642
 643void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 644{
 645}
 646
 647int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 648                          int flags, CPUWatchpoint **watchpoint)
 649{
 650    return -ENOSYS;
 651}
 652#else
 653/* Add a watchpoint.  */
 654int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 655                          int flags, CPUWatchpoint **watchpoint)
 656{
 657    CPUWatchpoint *wp;
 658
 659    /* forbid ranges which are empty or run off the end of the address space */
 660    if (len == 0 || (addr + len - 1) < addr) {
 661        error_report("tried to set invalid watchpoint at %"
 662                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 663        return -EINVAL;
 664    }
 665    wp = g_malloc(sizeof(*wp));
 666
 667    wp->vaddr = addr;
 668    wp->len = len;
 669    wp->flags = flags;
 670
 671    /* keep all GDB-injected watchpoints in front */
 672    if (flags & BP_GDB) {
 673        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 674    } else {
 675        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 676    }
 677
 678    tlb_flush_page(cpu, addr);
 679
 680    if (watchpoint)
 681        *watchpoint = wp;
 682    return 0;
 683}
 684
 685/* Remove a specific watchpoint.  */
 686int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 687                          int flags)
 688{
 689    CPUWatchpoint *wp;
 690
 691    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 692        if (addr == wp->vaddr && len == wp->len
 693                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 694            cpu_watchpoint_remove_by_ref(cpu, wp);
 695            return 0;
 696        }
 697    }
 698    return -ENOENT;
 699}
 700
 701/* Remove a specific watchpoint by reference.  */
 702void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 703{
 704    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 705
 706    tlb_flush_page(cpu, watchpoint->vaddr);
 707
 708    g_free(watchpoint);
 709}
 710
 711/* Remove all matching watchpoints.  */
 712void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 713{
 714    CPUWatchpoint *wp, *next;
 715
 716    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 717        if (wp->flags & mask) {
 718            cpu_watchpoint_remove_by_ref(cpu, wp);
 719        }
 720    }
 721}
 722
 723/* Return true if this watchpoint address matches the specified
 724 * access (ie the address range covered by the watchpoint overlaps
 725 * partially or completely with the address range covered by the
 726 * access).
 727 */
 728static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 729                                                  vaddr addr,
 730                                                  vaddr len)
 731{
 732    /* We know the lengths are non-zero, but a little caution is
 733     * required to avoid errors in the case where the range ends
 734     * exactly at the top of the address space and so addr + len
 735     * wraps round to zero.
 736     */
 737    vaddr wpend = wp->vaddr + wp->len - 1;
 738    vaddr addrend = addr + len - 1;
 739
 740    return !(addr > wpend || wp->vaddr > addrend);
 741}
 742
 743#endif
 744
 745/* Add a breakpoint.  */
 746int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 747                          CPUBreakpoint **breakpoint)
 748{
 749    CPUBreakpoint *bp;
 750
 751    bp = g_malloc(sizeof(*bp));
 752
 753    bp->pc = pc;
 754    bp->flags = flags;
 755
 756    /* keep all GDB-injected breakpoints in front */
 757    if (flags & BP_GDB) {
 758        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 759    } else {
 760        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 761    }
 762
 763    breakpoint_invalidate(cpu, pc);
 764
 765    if (breakpoint) {
 766        *breakpoint = bp;
 767    }
 768    return 0;
 769}
 770
 771/* Remove a specific breakpoint.  */
 772int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 773{
 774    CPUBreakpoint *bp;
 775
 776    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 777        if (bp->pc == pc && bp->flags == flags) {
 778            cpu_breakpoint_remove_by_ref(cpu, bp);
 779            return 0;
 780        }
 781    }
 782    return -ENOENT;
 783}
 784
 785/* Remove a specific breakpoint by reference.  */
 786void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 787{
 788    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 789
 790    breakpoint_invalidate(cpu, breakpoint->pc);
 791
 792    g_free(breakpoint);
 793}
 794
 795/* Remove all matching breakpoints. */
 796void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 797{
 798    CPUBreakpoint *bp, *next;
 799
 800    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 801        if (bp->flags & mask) {
 802            cpu_breakpoint_remove_by_ref(cpu, bp);
 803        }
 804    }
 805}
 806
 807/* enable or disable single step mode. EXCP_DEBUG is returned by the
 808   CPU loop after each instruction */
 809void cpu_single_step(CPUState *cpu, int enabled)
 810{
 811    if (cpu->singlestep_enabled != enabled) {
 812        cpu->singlestep_enabled = enabled;
 813        if (kvm_enabled()) {
 814            kvm_update_guest_debug(cpu, 0);
 815        } else {
 816            /* must flush all the translated code to avoid inconsistencies */
 817            /* XXX: only flush what is necessary */
 818            tb_flush(cpu);
 819        }
 820    }
 821}
 822
 823void cpu_abort(CPUState *cpu, const char *fmt, ...)
 824{
 825    va_list ap;
 826    va_list ap2;
 827
 828    va_start(ap, fmt);
 829    va_copy(ap2, ap);
 830    fprintf(stderr, "qemu: fatal: ");
 831    vfprintf(stderr, fmt, ap);
 832    fprintf(stderr, "\n");
 833    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 834    if (qemu_log_enabled()) {
 835        qemu_log("qemu: fatal: ");
 836        qemu_log_vprintf(fmt, ap2);
 837        qemu_log("\n");
 838        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 839        qemu_log_flush();
 840        qemu_log_close();
 841    }
 842    va_end(ap2);
 843    va_end(ap);
 844#if defined(CONFIG_USER_ONLY)
 845    {
 846        struct sigaction act;
 847        sigfillset(&act.sa_mask);
 848        act.sa_handler = SIG_DFL;
 849        sigaction(SIGABRT, &act, NULL);
 850    }
 851#endif
 852    abort();
 853}
 854
 855#if !defined(CONFIG_USER_ONLY)
 856/* Called from RCU critical section */
 857static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 858{
 859    RAMBlock *block;
 860
 861    block = atomic_rcu_read(&ram_list.mru_block);
 862    if (block && addr - block->offset < block->max_length) {
 863        goto found;
 864    }
 865    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 866        if (addr - block->offset < block->max_length) {
 867            goto found;
 868        }
 869    }
 870
 871    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 872    abort();
 873
 874found:
 875    /* It is safe to write mru_block outside the iothread lock.  This
 876     * is what happens:
 877     *
 878     *     mru_block = xxx
 879     *     rcu_read_unlock()
 880     *                                        xxx removed from list
 881     *                  rcu_read_lock()
 882     *                  read mru_block
 883     *                                        mru_block = NULL;
 884     *                                        call_rcu(reclaim_ramblock, xxx);
 885     *                  rcu_read_unlock()
 886     *
 887     * atomic_rcu_set is not needed here.  The block was already published
 888     * when it was placed into the list.  Here we're just making an extra
 889     * copy of the pointer.
 890     */
 891    ram_list.mru_block = block;
 892    return block;
 893}
 894
 895static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 896{
 897    ram_addr_t start1;
 898    RAMBlock *block;
 899    ram_addr_t end;
 900
 901    end = TARGET_PAGE_ALIGN(start + length);
 902    start &= TARGET_PAGE_MASK;
 903
 904    rcu_read_lock();
 905    block = qemu_get_ram_block(start);
 906    assert(block == qemu_get_ram_block(end - 1));
 907    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 908    cpu_tlb_reset_dirty_all(start1, length);
 909    rcu_read_unlock();
 910}
 911
 912/* Note: start and end must be within the same ram block.  */
 913bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 914                                              ram_addr_t length,
 915                                              unsigned client)
 916{
 917    unsigned long end, page;
 918    bool dirty;
 919
 920    if (length == 0) {
 921        return false;
 922    }
 923
 924    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 925    page = start >> TARGET_PAGE_BITS;
 926    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 927                                         page, end - page);
 928
 929    if (dirty && tcg_enabled()) {
 930        tlb_reset_dirty_range_all(start, length);
 931    }
 932
 933    return dirty;
 934}
 935
 936/* Called from RCU critical section */
 937hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 938                                       MemoryRegionSection *section,
 939                                       target_ulong vaddr,
 940                                       hwaddr paddr, hwaddr xlat,
 941                                       int prot,
 942                                       target_ulong *address)
 943{
 944    hwaddr iotlb;
 945    CPUWatchpoint *wp;
 946
 947    if (memory_region_is_ram(section->mr)) {
 948        /* Normal RAM.  */
 949        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 950            + xlat;
 951        if (!section->readonly) {
 952            iotlb |= PHYS_SECTION_NOTDIRTY;
 953        } else {
 954            iotlb |= PHYS_SECTION_ROM;
 955        }
 956    } else {
 957        AddressSpaceDispatch *d;
 958
 959        d = atomic_rcu_read(&section->address_space->dispatch);
 960        iotlb = section - d->map.sections;
 961        iotlb += xlat;
 962    }
 963
 964    /* Make accesses to pages with watchpoints go via the
 965       watchpoint trap routines.  */
 966    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 967        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
 968            /* Avoid trapping reads of pages with a write breakpoint. */
 969            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
 970                iotlb = PHYS_SECTION_WATCH + paddr;
 971                *address |= TLB_MMIO;
 972                break;
 973            }
 974        }
 975    }
 976
 977    return iotlb;
 978}
 979#endif /* defined(CONFIG_USER_ONLY) */
 980
 981#if !defined(CONFIG_USER_ONLY)
 982
 983static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
 984                             uint16_t section);
 985static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 986
 987static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
 988                               qemu_anon_ram_alloc;
 989
 990/*
 991 * Set a custom physical guest memory alloator.
 992 * Accelerators with unusual needs may need this.  Hopefully, we can
 993 * get rid of it eventually.
 994 */
 995void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 996{
 997    phys_mem_alloc = alloc;
 998}
 999
1000static uint16_t phys_section_add(PhysPageMap *map,
1001                                 MemoryRegionSection *section)
1002{
1003    /* The physical section number is ORed with a page-aligned
1004     * pointer to produce the iotlb entries.  Thus it should
1005     * never overflow into the page-aligned value.
1006     */
1007    assert(map->sections_nb < TARGET_PAGE_SIZE);
1008
1009    if (map->sections_nb == map->sections_nb_alloc) {
1010        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1011        map->sections = g_renew(MemoryRegionSection, map->sections,
1012                                map->sections_nb_alloc);
1013    }
1014    map->sections[map->sections_nb] = *section;
1015    memory_region_ref(section->mr);
1016    return map->sections_nb++;
1017}
1018
1019static void phys_section_destroy(MemoryRegion *mr)
1020{
1021    memory_region_unref(mr);
1022
1023    if (mr->subpage) {
1024        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1025        object_unref(OBJECT(&subpage->iomem));
1026        g_free(subpage);
1027    }
1028}
1029
1030static void phys_sections_free(PhysPageMap *map)
1031{
1032    while (map->sections_nb > 0) {
1033        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1034        phys_section_destroy(section->mr);
1035    }
1036    g_free(map->sections);
1037    g_free(map->nodes);
1038}
1039
1040static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1041{
1042    subpage_t *subpage;
1043    hwaddr base = section->offset_within_address_space
1044        & TARGET_PAGE_MASK;
1045    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1046                                                   d->map.nodes, d->map.sections);
1047    MemoryRegionSection subsection = {
1048        .offset_within_address_space = base,
1049        .size = int128_make64(TARGET_PAGE_SIZE),
1050    };
1051    hwaddr start, end;
1052
1053    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1054
1055    if (!(existing->mr->subpage)) {
1056        subpage = subpage_init(d->as, base);
1057        subsection.address_space = d->as;
1058        subsection.mr = &subpage->iomem;
1059        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1060                      phys_section_add(&d->map, &subsection));
1061    } else {
1062        subpage = container_of(existing->mr, subpage_t, iomem);
1063    }
1064    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1065    end = start + int128_get64(section->size) - 1;
1066    subpage_register(subpage, start, end,
1067                     phys_section_add(&d->map, section));
1068}
1069
1070
1071static void register_multipage(AddressSpaceDispatch *d,
1072                               MemoryRegionSection *section)
1073{
1074    hwaddr start_addr = section->offset_within_address_space;
1075    uint16_t section_index = phys_section_add(&d->map, section);
1076    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1077                                                    TARGET_PAGE_BITS));
1078
1079    assert(num_pages);
1080    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1081}
1082
1083static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1084{
1085    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1086    AddressSpaceDispatch *d = as->next_dispatch;
1087    MemoryRegionSection now = *section, remain = *section;
1088    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1089
1090    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1091        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1092                       - now.offset_within_address_space;
1093
1094        now.size = int128_min(int128_make64(left), now.size);
1095        register_subpage(d, &now);
1096    } else {
1097        now.size = int128_zero();
1098    }
1099    while (int128_ne(remain.size, now.size)) {
1100        remain.size = int128_sub(remain.size, now.size);
1101        remain.offset_within_address_space += int128_get64(now.size);
1102        remain.offset_within_region += int128_get64(now.size);
1103        now = remain;
1104        if (int128_lt(remain.size, page_size)) {
1105            register_subpage(d, &now);
1106        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1107            now.size = page_size;
1108            register_subpage(d, &now);
1109        } else {
1110            now.size = int128_and(now.size, int128_neg(page_size));
1111            register_multipage(d, &now);
1112        }
1113    }
1114}
1115
1116void qemu_flush_coalesced_mmio_buffer(void)
1117{
1118    if (kvm_enabled())
1119        kvm_flush_coalesced_mmio_buffer();
1120}
1121
1122void qemu_mutex_lock_ramlist(void)
1123{
1124    qemu_mutex_lock(&ram_list.mutex);
1125}
1126
1127void qemu_mutex_unlock_ramlist(void)
1128{
1129    qemu_mutex_unlock(&ram_list.mutex);
1130}
1131
1132#ifdef __linux__
1133
1134#include <sys/vfs.h>
1135
1136#define HUGETLBFS_MAGIC       0x958458f6
1137
1138static long gethugepagesize(const char *path, Error **errp)
1139{
1140    struct statfs fs;
1141    int ret;
1142
1143    do {
1144        ret = statfs(path, &fs);
1145    } while (ret != 0 && errno == EINTR);
1146
1147    if (ret != 0) {
1148        error_setg_errno(errp, errno, "failed to get page size of file %s",
1149                         path);
1150        return 0;
1151    }
1152
1153    if (fs.f_type != HUGETLBFS_MAGIC)
1154        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1155
1156    return fs.f_bsize;
1157}
1158
1159static void *file_ram_alloc(RAMBlock *block,
1160                            ram_addr_t memory,
1161                            const char *path,
1162                            Error **errp)
1163{
1164    char *filename;
1165    char *sanitized_name;
1166    char *c;
1167    void *area = NULL;
1168    int fd;
1169    uint64_t hpagesize;
1170    Error *local_err = NULL;
1171
1172    hpagesize = gethugepagesize(path, &local_err);
1173    if (local_err) {
1174        error_propagate(errp, local_err);
1175        goto error;
1176    }
1177    block->mr->align = hpagesize;
1178
1179    if (memory < hpagesize) {
1180        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1181                   "or larger than huge page size 0x%" PRIx64,
1182                   memory, hpagesize);
1183        goto error;
1184    }
1185
1186    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1187        error_setg(errp,
1188                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1189        goto error;
1190    }
1191
1192    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1193    sanitized_name = g_strdup(memory_region_name(block->mr));
1194    for (c = sanitized_name; *c != '\0'; c++) {
1195        if (*c == '/')
1196            *c = '_';
1197    }
1198
1199    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1200                               sanitized_name);
1201    g_free(sanitized_name);
1202
1203    fd = mkstemp(filename);
1204    if (fd < 0) {
1205        error_setg_errno(errp, errno,
1206                         "unable to create backing store for hugepages");
1207        g_free(filename);
1208        goto error;
1209    }
1210    unlink(filename);
1211    g_free(filename);
1212
1213    memory = (memory+hpagesize-1) & ~(hpagesize-1);
1214
1215    /*
1216     * ftruncate is not supported by hugetlbfs in older
1217     * hosts, so don't bother bailing out on errors.
1218     * If anything goes wrong with it under other filesystems,
1219     * mmap will fail.
1220     */
1221    if (ftruncate(fd, memory)) {
1222        perror("ftruncate");
1223    }
1224
1225    area = mmap(0, memory, PROT_READ | PROT_WRITE,
1226                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1227                fd, 0);
1228    if (area == MAP_FAILED) {
1229        error_setg_errno(errp, errno,
1230                         "unable to map backing store for hugepages");
1231        close(fd);
1232        goto error;
1233    }
1234
1235    if (mem_prealloc) {
1236        os_mem_prealloc(fd, area, memory);
1237    }
1238
1239    block->fd = fd;
1240    return area;
1241
1242error:
1243    if (mem_prealloc) {
1244        error_report("%s", error_get_pretty(*errp));
1245        exit(1);
1246    }
1247    return NULL;
1248}
1249#endif
1250
1251/* Called with the ramlist lock held.  */
1252static ram_addr_t find_ram_offset(ram_addr_t size)
1253{
1254    RAMBlock *block, *next_block;
1255    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1256
1257    assert(size != 0); /* it would hand out same offset multiple times */
1258
1259    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1260        return 0;
1261    }
1262
1263    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1264        ram_addr_t end, next = RAM_ADDR_MAX;
1265
1266        end = block->offset + block->max_length;
1267
1268        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1269            if (next_block->offset >= end) {
1270                next = MIN(next, next_block->offset);
1271            }
1272        }
1273        if (next - end >= size && next - end < mingap) {
1274            offset = end;
1275            mingap = next - end;
1276        }
1277    }
1278
1279    if (offset == RAM_ADDR_MAX) {
1280        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1281                (uint64_t)size);
1282        abort();
1283    }
1284
1285    return offset;
1286}
1287
1288ram_addr_t last_ram_offset(void)
1289{
1290    RAMBlock *block;
1291    ram_addr_t last = 0;
1292
1293    rcu_read_lock();
1294    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1295        last = MAX(last, block->offset + block->max_length);
1296    }
1297    rcu_read_unlock();
1298    return last;
1299}
1300
1301static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1302{
1303    int ret;
1304
1305    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1306    if (!machine_dump_guest_core(current_machine)) {
1307        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1308        if (ret) {
1309            perror("qemu_madvise");
1310            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1311                            "but dump_guest_core=off specified\n");
1312        }
1313    }
1314}
1315
1316/* Called within an RCU critical section, or while the ramlist lock
1317 * is held.
1318 */
1319static RAMBlock *find_ram_block(ram_addr_t addr)
1320{
1321    RAMBlock *block;
1322
1323    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1324        if (block->offset == addr) {
1325            return block;
1326        }
1327    }
1328
1329    return NULL;
1330}
1331
1332/* Called with iothread lock held.  */
1333void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1334{
1335    RAMBlock *new_block, *block;
1336
1337    rcu_read_lock();
1338    new_block = find_ram_block(addr);
1339    assert(new_block);
1340    assert(!new_block->idstr[0]);
1341
1342    if (dev) {
1343        char *id = qdev_get_dev_path(dev);
1344        if (id) {
1345            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1346            g_free(id);
1347        }
1348    }
1349    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1350
1351    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1352        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1353            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1354                    new_block->idstr);
1355            abort();
1356        }
1357    }
1358    rcu_read_unlock();
1359}
1360
1361/* Called with iothread lock held.  */
1362void qemu_ram_unset_idstr(ram_addr_t addr)
1363{
1364    RAMBlock *block;
1365
1366    /* FIXME: arch_init.c assumes that this is not called throughout
1367     * migration.  Ignore the problem since hot-unplug during migration
1368     * does not work anyway.
1369     */
1370
1371    rcu_read_lock();
1372    block = find_ram_block(addr);
1373    if (block) {
1374        memset(block->idstr, 0, sizeof(block->idstr));
1375    }
1376    rcu_read_unlock();
1377}
1378
1379static int memory_try_enable_merging(void *addr, size_t len)
1380{
1381    if (!machine_mem_merge(current_machine)) {
1382        /* disabled by the user */
1383        return 0;
1384    }
1385
1386    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1387}
1388
1389/* Only legal before guest might have detected the memory size: e.g. on
1390 * incoming migration, or right after reset.
1391 *
1392 * As memory core doesn't know how is memory accessed, it is up to
1393 * resize callback to update device state and/or add assertions to detect
1394 * misuse, if necessary.
1395 */
1396int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1397{
1398    RAMBlock *block = find_ram_block(base);
1399
1400    assert(block);
1401
1402    newsize = TARGET_PAGE_ALIGN(newsize);
1403
1404    if (block->used_length == newsize) {
1405        return 0;
1406    }
1407
1408    if (!(block->flags & RAM_RESIZEABLE)) {
1409        error_setg_errno(errp, EINVAL,
1410                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1411                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1412                         newsize, block->used_length);
1413        return -EINVAL;
1414    }
1415
1416    if (block->max_length < newsize) {
1417        error_setg_errno(errp, EINVAL,
1418                         "Length too large: %s: 0x" RAM_ADDR_FMT
1419                         " > 0x" RAM_ADDR_FMT, block->idstr,
1420                         newsize, block->max_length);
1421        return -EINVAL;
1422    }
1423
1424    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1425    block->used_length = newsize;
1426    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1427                                        DIRTY_CLIENTS_ALL);
1428    memory_region_set_size(block->mr, newsize);
1429    if (block->resized) {
1430        block->resized(block->idstr, newsize, block->host);
1431    }
1432    return 0;
1433}
1434
1435static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1436{
1437    RAMBlock *block;
1438    RAMBlock *last_block = NULL;
1439    ram_addr_t old_ram_size, new_ram_size;
1440
1441    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1442
1443    qemu_mutex_lock_ramlist();
1444    new_block->offset = find_ram_offset(new_block->max_length);
1445
1446    if (!new_block->host) {
1447        if (xen_enabled()) {
1448            xen_ram_alloc(new_block->offset, new_block->max_length,
1449                          new_block->mr);
1450        } else {
1451            new_block->host = phys_mem_alloc(new_block->max_length,
1452                                             &new_block->mr->align);
1453            if (!new_block->host) {
1454                error_setg_errno(errp, errno,
1455                                 "cannot set up guest memory '%s'",
1456                                 memory_region_name(new_block->mr));
1457                qemu_mutex_unlock_ramlist();
1458                return -1;
1459            }
1460            memory_try_enable_merging(new_block->host, new_block->max_length);
1461        }
1462    }
1463
1464    new_ram_size = MAX(old_ram_size,
1465              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1466    if (new_ram_size > old_ram_size) {
1467        migration_bitmap_extend(old_ram_size, new_ram_size);
1468    }
1469    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1470     * QLIST (which has an RCU-friendly variant) does not have insertion at
1471     * tail, so save the last element in last_block.
1472     */
1473    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1474        last_block = block;
1475        if (block->max_length < new_block->max_length) {
1476            break;
1477        }
1478    }
1479    if (block) {
1480        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1481    } else if (last_block) {
1482        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1483    } else { /* list is empty */
1484        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1485    }
1486    ram_list.mru_block = NULL;
1487
1488    /* Write list before version */
1489    smp_wmb();
1490    ram_list.version++;
1491    qemu_mutex_unlock_ramlist();
1492
1493    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1494
1495    if (new_ram_size > old_ram_size) {
1496        int i;
1497
1498        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1499        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1500            ram_list.dirty_memory[i] =
1501                bitmap_zero_extend(ram_list.dirty_memory[i],
1502                                   old_ram_size, new_ram_size);
1503       }
1504    }
1505    cpu_physical_memory_set_dirty_range(new_block->offset,
1506                                        new_block->used_length,
1507                                        DIRTY_CLIENTS_ALL);
1508
1509    if (new_block->host) {
1510        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1511        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1512        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1513        if (kvm_enabled()) {
1514            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1515        }
1516    }
1517
1518    return new_block->offset;
1519}
1520
1521#ifdef __linux__
1522ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1523                                    bool share, const char *mem_path,
1524                                    Error **errp)
1525{
1526    RAMBlock *new_block;
1527    ram_addr_t addr;
1528    Error *local_err = NULL;
1529
1530    if (xen_enabled()) {
1531        error_setg(errp, "-mem-path not supported with Xen");
1532        return -1;
1533    }
1534
1535    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1536        /*
1537         * file_ram_alloc() needs to allocate just like
1538         * phys_mem_alloc, but we haven't bothered to provide
1539         * a hook there.
1540         */
1541        error_setg(errp,
1542                   "-mem-path not supported with this accelerator");
1543        return -1;
1544    }
1545
1546    size = TARGET_PAGE_ALIGN(size);
1547    new_block = g_malloc0(sizeof(*new_block));
1548    new_block->mr = mr;
1549    new_block->used_length = size;
1550    new_block->max_length = size;
1551    new_block->flags = share ? RAM_SHARED : 0;
1552    new_block->host = file_ram_alloc(new_block, size,
1553                                     mem_path, errp);
1554    if (!new_block->host) {
1555        g_free(new_block);
1556        return -1;
1557    }
1558
1559    addr = ram_block_add(new_block, &local_err);
1560    if (local_err) {
1561        g_free(new_block);
1562        error_propagate(errp, local_err);
1563        return -1;
1564    }
1565    return addr;
1566}
1567#endif
1568
1569static
1570ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1571                                   void (*resized)(const char*,
1572                                                   uint64_t length,
1573                                                   void *host),
1574                                   void *host, bool resizeable,
1575                                   MemoryRegion *mr, Error **errp)
1576{
1577    RAMBlock *new_block;
1578    ram_addr_t addr;
1579    Error *local_err = NULL;
1580
1581    size = TARGET_PAGE_ALIGN(size);
1582    max_size = TARGET_PAGE_ALIGN(max_size);
1583    new_block = g_malloc0(sizeof(*new_block));
1584    new_block->mr = mr;
1585    new_block->resized = resized;
1586    new_block->used_length = size;
1587    new_block->max_length = max_size;
1588    assert(max_size >= size);
1589    new_block->fd = -1;
1590    new_block->host = host;
1591    if (host) {
1592        new_block->flags |= RAM_PREALLOC;
1593    }
1594    if (resizeable) {
1595        new_block->flags |= RAM_RESIZEABLE;
1596    }
1597    addr = ram_block_add(new_block, &local_err);
1598    if (local_err) {
1599        g_free(new_block);
1600        error_propagate(errp, local_err);
1601        return -1;
1602    }
1603    return addr;
1604}
1605
1606ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1607                                   MemoryRegion *mr, Error **errp)
1608{
1609    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1610}
1611
1612ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1613{
1614    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1615}
1616
1617ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1618                                     void (*resized)(const char*,
1619                                                     uint64_t length,
1620                                                     void *host),
1621                                     MemoryRegion *mr, Error **errp)
1622{
1623    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1624}
1625
1626void qemu_ram_free_from_ptr(ram_addr_t addr)
1627{
1628    RAMBlock *block;
1629
1630    qemu_mutex_lock_ramlist();
1631    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1632        if (addr == block->offset) {
1633            QLIST_REMOVE_RCU(block, next);
1634            ram_list.mru_block = NULL;
1635            /* Write list before version */
1636            smp_wmb();
1637            ram_list.version++;
1638            g_free_rcu(block, rcu);
1639            break;
1640        }
1641    }
1642    qemu_mutex_unlock_ramlist();
1643}
1644
1645static void reclaim_ramblock(RAMBlock *block)
1646{
1647    if (block->flags & RAM_PREALLOC) {
1648        ;
1649    } else if (xen_enabled()) {
1650        xen_invalidate_map_cache_entry(block->host);
1651#ifndef _WIN32
1652    } else if (block->fd >= 0) {
1653        munmap(block->host, block->max_length);
1654        close(block->fd);
1655#endif
1656    } else {
1657        qemu_anon_ram_free(block->host, block->max_length);
1658    }
1659    g_free(block);
1660}
1661
1662void qemu_ram_free(ram_addr_t addr)
1663{
1664    RAMBlock *block;
1665
1666    qemu_mutex_lock_ramlist();
1667    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1668        if (addr == block->offset) {
1669            QLIST_REMOVE_RCU(block, next);
1670            ram_list.mru_block = NULL;
1671            /* Write list before version */
1672            smp_wmb();
1673            ram_list.version++;
1674            call_rcu(block, reclaim_ramblock, rcu);
1675            break;
1676        }
1677    }
1678    qemu_mutex_unlock_ramlist();
1679}
1680
1681#ifndef _WIN32
1682void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1683{
1684    RAMBlock *block;
1685    ram_addr_t offset;
1686    int flags;
1687    void *area, *vaddr;
1688
1689    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1690        offset = addr - block->offset;
1691        if (offset < block->max_length) {
1692            vaddr = ramblock_ptr(block, offset);
1693            if (block->flags & RAM_PREALLOC) {
1694                ;
1695            } else if (xen_enabled()) {
1696                abort();
1697            } else {
1698                flags = MAP_FIXED;
1699                if (block->fd >= 0) {
1700                    flags |= (block->flags & RAM_SHARED ?
1701                              MAP_SHARED : MAP_PRIVATE);
1702                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1703                                flags, block->fd, offset);
1704                } else {
1705                    /*
1706                     * Remap needs to match alloc.  Accelerators that
1707                     * set phys_mem_alloc never remap.  If they did,
1708                     * we'd need a remap hook here.
1709                     */
1710                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1711
1712                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1713                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1714                                flags, -1, 0);
1715                }
1716                if (area != vaddr) {
1717                    fprintf(stderr, "Could not remap addr: "
1718                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1719                            length, addr);
1720                    exit(1);
1721                }
1722                memory_try_enable_merging(vaddr, length);
1723                qemu_ram_setup_dump(vaddr, length);
1724            }
1725        }
1726    }
1727}
1728#endif /* !_WIN32 */
1729
1730int qemu_get_ram_fd(ram_addr_t addr)
1731{
1732    RAMBlock *block;
1733    int fd;
1734
1735    rcu_read_lock();
1736    block = qemu_get_ram_block(addr);
1737    fd = block->fd;
1738    rcu_read_unlock();
1739    return fd;
1740}
1741
1742void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1743{
1744    RAMBlock *block;
1745    void *ptr;
1746
1747    rcu_read_lock();
1748    block = qemu_get_ram_block(addr);
1749    ptr = ramblock_ptr(block, 0);
1750    rcu_read_unlock();
1751    return ptr;
1752}
1753
1754/* Return a host pointer to ram allocated with qemu_ram_alloc.
1755 * This should not be used for general purpose DMA.  Use address_space_map
1756 * or address_space_rw instead. For local memory (e.g. video ram) that the
1757 * device owns, use memory_region_get_ram_ptr.
1758 *
1759 * By the time this function returns, the returned pointer is not protected
1760 * by RCU anymore.  If the caller is not within an RCU critical section and
1761 * does not hold the iothread lock, it must have other means of protecting the
1762 * pointer, such as a reference to the region that includes the incoming
1763 * ram_addr_t.
1764 */
1765void *qemu_get_ram_ptr(ram_addr_t addr)
1766{
1767    RAMBlock *block;
1768    void *ptr;
1769
1770    rcu_read_lock();
1771    block = qemu_get_ram_block(addr);
1772
1773    if (xen_enabled() && block->host == NULL) {
1774        /* We need to check if the requested address is in the RAM
1775         * because we don't want to map the entire memory in QEMU.
1776         * In that case just map until the end of the page.
1777         */
1778        if (block->offset == 0) {
1779            ptr = xen_map_cache(addr, 0, 0);
1780            goto unlock;
1781        }
1782
1783        block->host = xen_map_cache(block->offset, block->max_length, 1);
1784    }
1785    ptr = ramblock_ptr(block, addr - block->offset);
1786
1787unlock:
1788    rcu_read_unlock();
1789    return ptr;
1790}
1791
1792/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1793 * but takes a size argument.
1794 *
1795 * By the time this function returns, the returned pointer is not protected
1796 * by RCU anymore.  If the caller is not within an RCU critical section and
1797 * does not hold the iothread lock, it must have other means of protecting the
1798 * pointer, such as a reference to the region that includes the incoming
1799 * ram_addr_t.
1800 */
1801static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1802{
1803    void *ptr;
1804    if (*size == 0) {
1805        return NULL;
1806    }
1807    if (xen_enabled()) {
1808        return xen_map_cache(addr, *size, 1);
1809    } else {
1810        RAMBlock *block;
1811        rcu_read_lock();
1812        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1813            if (addr - block->offset < block->max_length) {
1814                if (addr - block->offset + *size > block->max_length)
1815                    *size = block->max_length - addr + block->offset;
1816                ptr = ramblock_ptr(block, addr - block->offset);
1817                rcu_read_unlock();
1818                return ptr;
1819            }
1820        }
1821
1822        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1823        abort();
1824    }
1825}
1826
1827/* Some of the softmmu routines need to translate from a host pointer
1828 * (typically a TLB entry) back to a ram offset.
1829 *
1830 * By the time this function returns, the returned pointer is not protected
1831 * by RCU anymore.  If the caller is not within an RCU critical section and
1832 * does not hold the iothread lock, it must have other means of protecting the
1833 * pointer, such as a reference to the region that includes the incoming
1834 * ram_addr_t.
1835 */
1836MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1837{
1838    RAMBlock *block;
1839    uint8_t *host = ptr;
1840    MemoryRegion *mr;
1841
1842    if (xen_enabled()) {
1843        rcu_read_lock();
1844        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1845        mr = qemu_get_ram_block(*ram_addr)->mr;
1846        rcu_read_unlock();
1847        return mr;
1848    }
1849
1850    rcu_read_lock();
1851    block = atomic_rcu_read(&ram_list.mru_block);
1852    if (block && block->host && host - block->host < block->max_length) {
1853        goto found;
1854    }
1855
1856    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1857        /* This case append when the block is not mapped. */
1858        if (block->host == NULL) {
1859            continue;
1860        }
1861        if (host - block->host < block->max_length) {
1862            goto found;
1863        }
1864    }
1865
1866    rcu_read_unlock();
1867    return NULL;
1868
1869found:
1870    *ram_addr = block->offset + (host - block->host);
1871    mr = block->mr;
1872    rcu_read_unlock();
1873    return mr;
1874}
1875
1876static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1877                               uint64_t val, unsigned size)
1878{
1879    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1880        tb_invalidate_phys_page_fast(ram_addr, size);
1881    }
1882    switch (size) {
1883    case 1:
1884        stb_p(qemu_get_ram_ptr(ram_addr), val);
1885        break;
1886    case 2:
1887        stw_p(qemu_get_ram_ptr(ram_addr), val);
1888        break;
1889    case 4:
1890        stl_p(qemu_get_ram_ptr(ram_addr), val);
1891        break;
1892    default:
1893        abort();
1894    }
1895    /* Set both VGA and migration bits for simplicity and to remove
1896     * the notdirty callback faster.
1897     */
1898    cpu_physical_memory_set_dirty_range(ram_addr, size,
1899                                        DIRTY_CLIENTS_NOCODE);
1900    /* we remove the notdirty callback only if the code has been
1901       flushed */
1902    if (!cpu_physical_memory_is_clean(ram_addr)) {
1903        CPUArchState *env = current_cpu->env_ptr;
1904        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1905    }
1906}
1907
1908static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1909                                 unsigned size, bool is_write)
1910{
1911    return is_write;
1912}
1913
1914static const MemoryRegionOps notdirty_mem_ops = {
1915    .write = notdirty_mem_write,
1916    .valid.accepts = notdirty_mem_accepts,
1917    .endianness = DEVICE_NATIVE_ENDIAN,
1918};
1919
1920/* Generate a debug exception if a watchpoint has been hit.  */
1921static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1922{
1923    CPUState *cpu = current_cpu;
1924    CPUArchState *env = cpu->env_ptr;
1925    target_ulong pc, cs_base;
1926    target_ulong vaddr;
1927    CPUWatchpoint *wp;
1928    int cpu_flags;
1929
1930    if (cpu->watchpoint_hit) {
1931        /* We re-entered the check after replacing the TB. Now raise
1932         * the debug interrupt so that is will trigger after the
1933         * current instruction. */
1934        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1935        return;
1936    }
1937    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1938    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1939        if (cpu_watchpoint_address_matches(wp, vaddr, len)
1940            && (wp->flags & flags)) {
1941            if (flags == BP_MEM_READ) {
1942                wp->flags |= BP_WATCHPOINT_HIT_READ;
1943            } else {
1944                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1945            }
1946            wp->hitaddr = vaddr;
1947            wp->hitattrs = attrs;
1948            if (!cpu->watchpoint_hit) {
1949                cpu->watchpoint_hit = wp;
1950                tb_check_watchpoint(cpu);
1951                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1952                    cpu->exception_index = EXCP_DEBUG;
1953                    cpu_loop_exit(cpu);
1954                } else {
1955                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1956                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1957                    cpu_resume_from_signal(cpu, NULL);
1958                }
1959            }
1960        } else {
1961            wp->flags &= ~BP_WATCHPOINT_HIT;
1962        }
1963    }
1964}
1965
1966/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1967   so these check for a hit then pass through to the normal out-of-line
1968   phys routines.  */
1969static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1970                                  unsigned size, MemTxAttrs attrs)
1971{
1972    MemTxResult res;
1973    uint64_t data;
1974
1975    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1976    switch (size) {
1977    case 1:
1978        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1979        break;
1980    case 2:
1981        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1982        break;
1983    case 4:
1984        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1985        break;
1986    default: abort();
1987    }
1988    *pdata = data;
1989    return res;
1990}
1991
1992static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1993                                   uint64_t val, unsigned size,
1994                                   MemTxAttrs attrs)
1995{
1996    MemTxResult res;
1997
1998    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1999    switch (size) {
2000    case 1:
2001        address_space_stb(&address_space_memory, addr, val, attrs, &res);
2002        break;
2003    case 2:
2004        address_space_stw(&address_space_memory, addr, val, attrs, &res);
2005        break;
2006    case 4:
2007        address_space_stl(&address_space_memory, addr, val, attrs, &res);
2008        break;
2009    default: abort();
2010    }
2011    return res;
2012}
2013
2014static const MemoryRegionOps watch_mem_ops = {
2015    .read_with_attrs = watch_mem_read,
2016    .write_with_attrs = watch_mem_write,
2017    .endianness = DEVICE_NATIVE_ENDIAN,
2018};
2019
2020static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2021                                unsigned len, MemTxAttrs attrs)
2022{
2023    subpage_t *subpage = opaque;
2024    uint8_t buf[8];
2025    MemTxResult res;
2026
2027#if defined(DEBUG_SUBPAGE)
2028    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2029           subpage, len, addr);
2030#endif
2031    res = address_space_read(subpage->as, addr + subpage->base,
2032                             attrs, buf, len);
2033    if (res) {
2034        return res;
2035    }
2036    switch (len) {
2037    case 1:
2038        *data = ldub_p(buf);
2039        return MEMTX_OK;
2040    case 2:
2041        *data = lduw_p(buf);
2042        return MEMTX_OK;
2043    case 4:
2044        *data = ldl_p(buf);
2045        return MEMTX_OK;
2046    case 8:
2047        *data = ldq_p(buf);
2048        return MEMTX_OK;
2049    default:
2050        abort();
2051    }
2052}
2053
2054static MemTxResult subpage_write(void *opaque, hwaddr addr,
2055                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2056{
2057    subpage_t *subpage = opaque;
2058    uint8_t buf[8];
2059
2060#if defined(DEBUG_SUBPAGE)
2061    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2062           " value %"PRIx64"\n",
2063           __func__, subpage, len, addr, value);
2064#endif
2065    switch (len) {
2066    case 1:
2067        stb_p(buf, value);
2068        break;
2069    case 2:
2070        stw_p(buf, value);
2071        break;
2072    case 4:
2073        stl_p(buf, value);
2074        break;
2075    case 8:
2076        stq_p(buf, value);
2077        break;
2078    default:
2079        abort();
2080    }
2081    return address_space_write(subpage->as, addr + subpage->base,
2082                               attrs, buf, len);
2083}
2084
2085static bool subpage_accepts(void *opaque, hwaddr addr,
2086                            unsigned len, bool is_write)
2087{
2088    subpage_t *subpage = opaque;
2089#if defined(DEBUG_SUBPAGE)
2090    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2091           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2092#endif
2093
2094    return address_space_access_valid(subpage->as, addr + subpage->base,
2095                                      len, is_write);
2096}
2097
2098static const MemoryRegionOps subpage_ops = {
2099    .read_with_attrs = subpage_read,
2100    .write_with_attrs = subpage_write,
2101    .impl.min_access_size = 1,
2102    .impl.max_access_size = 8,
2103    .valid.min_access_size = 1,
2104    .valid.max_access_size = 8,
2105    .valid.accepts = subpage_accepts,
2106    .endianness = DEVICE_NATIVE_ENDIAN,
2107};
2108
2109static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2110                             uint16_t section)
2111{
2112    int idx, eidx;
2113
2114    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2115        return -1;
2116    idx = SUBPAGE_IDX(start);
2117    eidx = SUBPAGE_IDX(end);
2118#if defined(DEBUG_SUBPAGE)
2119    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2120           __func__, mmio, start, end, idx, eidx, section);
2121#endif
2122    for (; idx <= eidx; idx++) {
2123        mmio->sub_section[idx] = section;
2124    }
2125
2126    return 0;
2127}
2128
2129static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2130{
2131    subpage_t *mmio;
2132
2133    mmio = g_malloc0(sizeof(subpage_t));
2134
2135    mmio->as = as;
2136    mmio->base = base;
2137    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2138                          NULL, TARGET_PAGE_SIZE);
2139    mmio->iomem.subpage = true;
2140#if defined(DEBUG_SUBPAGE)
2141    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2142           mmio, base, TARGET_PAGE_SIZE);
2143#endif
2144    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2145
2146    return mmio;
2147}
2148
2149static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2150                              MemoryRegion *mr)
2151{
2152    assert(as);
2153    MemoryRegionSection section = {
2154        .address_space = as,
2155        .mr = mr,
2156        .offset_within_address_space = 0,
2157        .offset_within_region = 0,
2158        .size = int128_2_64(),
2159    };
2160
2161    return phys_section_add(map, &section);
2162}
2163
2164MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2165{
2166    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2167    MemoryRegionSection *sections = d->map.sections;
2168
2169    return sections[index & ~TARGET_PAGE_MASK].mr;
2170}
2171
2172static void io_mem_init(void)
2173{
2174    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2175    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2176                          NULL, UINT64_MAX);
2177    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2178                          NULL, UINT64_MAX);
2179    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2180                          NULL, UINT64_MAX);
2181}
2182
2183static void mem_begin(MemoryListener *listener)
2184{
2185    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2186    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2187    uint16_t n;
2188
2189    n = dummy_section(&d->map, as, &io_mem_unassigned);
2190    assert(n == PHYS_SECTION_UNASSIGNED);
2191    n = dummy_section(&d->map, as, &io_mem_notdirty);
2192    assert(n == PHYS_SECTION_NOTDIRTY);
2193    n = dummy_section(&d->map, as, &io_mem_rom);
2194    assert(n == PHYS_SECTION_ROM);
2195    n = dummy_section(&d->map, as, &io_mem_watch);
2196    assert(n == PHYS_SECTION_WATCH);
2197
2198    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2199    d->as = as;
2200    as->next_dispatch = d;
2201}
2202
2203static void address_space_dispatch_free(AddressSpaceDispatch *d)
2204{
2205    phys_sections_free(&d->map);
2206    g_free(d);
2207}
2208
2209static void mem_commit(MemoryListener *listener)
2210{
2211    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2212    AddressSpaceDispatch *cur = as->dispatch;
2213    AddressSpaceDispatch *next = as->next_dispatch;
2214
2215    phys_page_compact_all(next, next->map.nodes_nb);
2216
2217    atomic_rcu_set(&as->dispatch, next);
2218    if (cur) {
2219        call_rcu(cur, address_space_dispatch_free, rcu);
2220    }
2221}
2222
2223static void tcg_commit(MemoryListener *listener)
2224{
2225    CPUState *cpu;
2226
2227    /* since each CPU stores ram addresses in its TLB cache, we must
2228       reset the modified entries */
2229    /* XXX: slow ! */
2230    CPU_FOREACH(cpu) {
2231        /* FIXME: Disentangle the cpu.h circular files deps so we can
2232           directly get the right CPU from listener.  */
2233        if (cpu->tcg_as_listener != listener) {
2234            continue;
2235        }
2236        cpu_reload_memory_map(cpu);
2237    }
2238}
2239
2240void address_space_init_dispatch(AddressSpace *as)
2241{
2242    as->dispatch = NULL;
2243    as->dispatch_listener = (MemoryListener) {
2244        .begin = mem_begin,
2245        .commit = mem_commit,
2246        .region_add = mem_add,
2247        .region_nop = mem_add,
2248        .priority = 0,
2249    };
2250    memory_listener_register(&as->dispatch_listener, as);
2251}
2252
2253void address_space_unregister(AddressSpace *as)
2254{
2255    memory_listener_unregister(&as->dispatch_listener);
2256}
2257
2258void address_space_destroy_dispatch(AddressSpace *as)
2259{
2260    AddressSpaceDispatch *d = as->dispatch;
2261
2262    atomic_rcu_set(&as->dispatch, NULL);
2263    if (d) {
2264        call_rcu(d, address_space_dispatch_free, rcu);
2265    }
2266}
2267
2268static void memory_map_init(void)
2269{
2270    system_memory = g_malloc(sizeof(*system_memory));
2271
2272    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2273    address_space_init(&address_space_memory, system_memory, "memory");
2274
2275    system_io = g_malloc(sizeof(*system_io));
2276    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2277                          65536);
2278    address_space_init(&address_space_io, system_io, "I/O");
2279}
2280
2281MemoryRegion *get_system_memory(void)
2282{
2283    return system_memory;
2284}
2285
2286MemoryRegion *get_system_io(void)
2287{
2288    return system_io;
2289}
2290
2291#endif /* !defined(CONFIG_USER_ONLY) */
2292
2293/* physical memory access (slow version, mainly for debug) */
2294#if defined(CONFIG_USER_ONLY)
2295int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2296                        uint8_t *buf, int len, int is_write)
2297{
2298    int l, flags;
2299    target_ulong page;
2300    void * p;
2301
2302    while (len > 0) {
2303        page = addr & TARGET_PAGE_MASK;
2304        l = (page + TARGET_PAGE_SIZE) - addr;
2305        if (l > len)
2306            l = len;
2307        flags = page_get_flags(page);
2308        if (!(flags & PAGE_VALID))
2309            return -1;
2310        if (is_write) {
2311            if (!(flags & PAGE_WRITE))
2312                return -1;
2313            /* XXX: this code should not depend on lock_user */
2314            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2315                return -1;
2316            memcpy(p, buf, l);
2317            unlock_user(p, addr, l);
2318        } else {
2319            if (!(flags & PAGE_READ))
2320                return -1;
2321            /* XXX: this code should not depend on lock_user */
2322            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2323                return -1;
2324            memcpy(buf, p, l);
2325            unlock_user(p, addr, 0);
2326        }
2327        len -= l;
2328        buf += l;
2329        addr += l;
2330    }
2331    return 0;
2332}
2333
2334#else
2335
2336static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2337                                     hwaddr length)
2338{
2339    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2340    /* No early return if dirty_log_mask is or becomes 0, because
2341     * cpu_physical_memory_set_dirty_range will still call
2342     * xen_modified_memory.
2343     */
2344    if (dirty_log_mask) {
2345        dirty_log_mask =
2346            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2347    }
2348    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2349        tb_invalidate_phys_range(addr, addr + length);
2350        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2351    }
2352    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2353}
2354
2355static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2356{
2357    unsigned access_size_max = mr->ops->valid.max_access_size;
2358
2359    /* Regions are assumed to support 1-4 byte accesses unless
2360       otherwise specified.  */
2361    if (access_size_max == 0) {
2362        access_size_max = 4;
2363    }
2364
2365    /* Bound the maximum access by the alignment of the address.  */
2366    if (!mr->ops->impl.unaligned) {
2367        unsigned align_size_max = addr & -addr;
2368        if (align_size_max != 0 && align_size_max < access_size_max) {
2369            access_size_max = align_size_max;
2370        }
2371    }
2372
2373    /* Don't attempt accesses larger than the maximum.  */
2374    if (l > access_size_max) {
2375        l = access_size_max;
2376    }
2377    if (l & (l - 1)) {
2378        l = 1 << (qemu_fls(l) - 1);
2379    }
2380
2381    return l;
2382}
2383
2384static bool prepare_mmio_access(MemoryRegion *mr)
2385{
2386    bool unlocked = !qemu_mutex_iothread_locked();
2387    bool release_lock = false;
2388
2389    if (unlocked && mr->global_locking) {
2390        qemu_mutex_lock_iothread();
2391        unlocked = false;
2392        release_lock = true;
2393    }
2394    if (mr->flush_coalesced_mmio) {
2395        if (unlocked) {
2396            qemu_mutex_lock_iothread();
2397        }
2398        qemu_flush_coalesced_mmio_buffer();
2399        if (unlocked) {
2400            qemu_mutex_unlock_iothread();
2401        }
2402    }
2403
2404    return release_lock;
2405}
2406
2407MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2408                             uint8_t *buf, int len, bool is_write)
2409{
2410    hwaddr l;
2411    uint8_t *ptr;
2412    uint64_t val;
2413    hwaddr addr1;
2414    MemoryRegion *mr;
2415    MemTxResult result = MEMTX_OK;
2416    bool release_lock = false;
2417
2418    rcu_read_lock();
2419    while (len > 0) {
2420        l = len;
2421        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2422
2423        if (is_write) {
2424            if (!memory_access_is_direct(mr, is_write)) {
2425                release_lock |= prepare_mmio_access(mr);
2426                l = memory_access_size(mr, l, addr1);
2427                /* XXX: could force current_cpu to NULL to avoid
2428                   potential bugs */
2429                switch (l) {
2430                case 8:
2431                    /* 64 bit write access */
2432                    val = ldq_p(buf);
2433                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
2434                                                           attrs);
2435                    break;
2436                case 4:
2437                    /* 32 bit write access */
2438                    val = ldl_p(buf);
2439                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
2440                                                           attrs);
2441                    break;
2442                case 2:
2443                    /* 16 bit write access */
2444                    val = lduw_p(buf);
2445                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
2446                                                           attrs);
2447                    break;
2448                case 1:
2449                    /* 8 bit write access */
2450                    val = ldub_p(buf);
2451                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
2452                                                           attrs);
2453                    break;
2454                default:
2455                    abort();
2456                }
2457            } else {
2458                addr1 += memory_region_get_ram_addr(mr);
2459                /* RAM case */
2460                ptr = qemu_get_ram_ptr(addr1);
2461                memcpy(ptr, buf, l);
2462                invalidate_and_set_dirty(mr, addr1, l);
2463            }
2464        } else {
2465            if (!memory_access_is_direct(mr, is_write)) {
2466                /* I/O case */
2467                release_lock |= prepare_mmio_access(mr);
2468                l = memory_access_size(mr, l, addr1);
2469                switch (l) {
2470                case 8:
2471                    /* 64 bit read access */
2472                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2473                                                          attrs);
2474                    stq_p(buf, val);
2475                    break;
2476                case 4:
2477                    /* 32 bit read access */
2478                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2479                                                          attrs);
2480                    stl_p(buf, val);
2481                    break;
2482                case 2:
2483                    /* 16 bit read access */
2484                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2485                                                          attrs);
2486                    stw_p(buf, val);
2487                    break;
2488                case 1:
2489                    /* 8 bit read access */
2490                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2491                                                          attrs);
2492                    stb_p(buf, val);
2493                    break;
2494                default:
2495                    abort();
2496                }
2497            } else {
2498                /* RAM case */
2499                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2500                memcpy(buf, ptr, l);
2501            }
2502        }
2503
2504        if (release_lock) {
2505            qemu_mutex_unlock_iothread();
2506            release_lock = false;
2507        }
2508
2509        len -= l;
2510        buf += l;
2511        addr += l;
2512    }
2513    rcu_read_unlock();
2514
2515    return result;
2516}
2517
2518MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2519                                const uint8_t *buf, int len)
2520{
2521    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2522}
2523
2524MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2525                               uint8_t *buf, int len)
2526{
2527    return address_space_rw(as, addr, attrs, buf, len, false);
2528}
2529
2530
2531void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2532                            int len, int is_write)
2533{
2534    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2535                     buf, len, is_write);
2536}
2537
2538enum write_rom_type {
2539    WRITE_DATA,
2540    FLUSH_CACHE,
2541};
2542
2543static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2544    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2545{
2546    hwaddr l;
2547    uint8_t *ptr;
2548    hwaddr addr1;
2549    MemoryRegion *mr;
2550
2551    rcu_read_lock();
2552    while (len > 0) {
2553        l = len;
2554        mr = address_space_translate(as, addr, &addr1, &l, true);
2555
2556        if (!(memory_region_is_ram(mr) ||
2557              memory_region_is_romd(mr))) {
2558            l = memory_access_size(mr, l, addr1);
2559        } else {
2560            addr1 += memory_region_get_ram_addr(mr);
2561            /* ROM/RAM case */
2562            ptr = qemu_get_ram_ptr(addr1);
2563            switch (type) {
2564            case WRITE_DATA:
2565                memcpy(ptr, buf, l);
2566                invalidate_and_set_dirty(mr, addr1, l);
2567                break;
2568            case FLUSH_CACHE:
2569                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2570                break;
2571            }
2572        }
2573        len -= l;
2574        buf += l;
2575        addr += l;
2576    }
2577    rcu_read_unlock();
2578}
2579
2580/* used for ROM loading : can write in RAM and ROM */
2581void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2582                                   const uint8_t *buf, int len)
2583{
2584    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2585}
2586
2587void cpu_flush_icache_range(hwaddr start, int len)
2588{
2589    /*
2590     * This function should do the same thing as an icache flush that was
2591     * triggered from within the guest. For TCG we are always cache coherent,
2592     * so there is no need to flush anything. For KVM / Xen we need to flush
2593     * the host's instruction cache at least.
2594     */
2595    if (tcg_enabled()) {
2596        return;
2597    }
2598
2599    cpu_physical_memory_write_rom_internal(&address_space_memory,
2600                                           start, NULL, len, FLUSH_CACHE);
2601}
2602
2603typedef struct {
2604    MemoryRegion *mr;
2605    void *buffer;
2606    hwaddr addr;
2607    hwaddr len;
2608    bool in_use;
2609} BounceBuffer;
2610
2611static BounceBuffer bounce;
2612
2613typedef struct MapClient {
2614    QEMUBH *bh;
2615    QLIST_ENTRY(MapClient) link;
2616} MapClient;
2617
2618QemuMutex map_client_list_lock;
2619static QLIST_HEAD(map_client_list, MapClient) map_client_list
2620    = QLIST_HEAD_INITIALIZER(map_client_list);
2621
2622static void cpu_unregister_map_client_do(MapClient *client)
2623{
2624    QLIST_REMOVE(client, link);
2625    g_free(client);
2626}
2627
2628static void cpu_notify_map_clients_locked(void)
2629{
2630    MapClient *client;
2631
2632    while (!QLIST_EMPTY(&map_client_list)) {
2633        client = QLIST_FIRST(&map_client_list);
2634        qemu_bh_schedule(client->bh);
2635        cpu_unregister_map_client_do(client);
2636    }
2637}
2638
2639void cpu_register_map_client(QEMUBH *bh)
2640{
2641    MapClient *client = g_malloc(sizeof(*client));
2642
2643    qemu_mutex_lock(&map_client_list_lock);
2644    client->bh = bh;
2645    QLIST_INSERT_HEAD(&map_client_list, client, link);
2646    if (!atomic_read(&bounce.in_use)) {
2647        cpu_notify_map_clients_locked();
2648    }
2649    qemu_mutex_unlock(&map_client_list_lock);
2650}
2651
2652void cpu_exec_init_all(void)
2653{
2654    qemu_mutex_init(&ram_list.mutex);
2655    memory_map_init();
2656    io_mem_init();
2657    qemu_mutex_init(&map_client_list_lock);
2658}
2659
2660void cpu_unregister_map_client(QEMUBH *bh)
2661{
2662    MapClient *client;
2663
2664    qemu_mutex_lock(&map_client_list_lock);
2665    QLIST_FOREACH(client, &map_client_list, link) {
2666        if (client->bh == bh) {
2667            cpu_unregister_map_client_do(client);
2668            break;
2669        }
2670    }
2671    qemu_mutex_unlock(&map_client_list_lock);
2672}
2673
2674static void cpu_notify_map_clients(void)
2675{
2676    qemu_mutex_lock(&map_client_list_lock);
2677    cpu_notify_map_clients_locked();
2678    qemu_mutex_unlock(&map_client_list_lock);
2679}
2680
2681bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2682{
2683    MemoryRegion *mr;
2684    hwaddr l, xlat;
2685
2686    rcu_read_lock();
2687    while (len > 0) {
2688        l = len;
2689        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2690        if (!memory_access_is_direct(mr, is_write)) {
2691            l = memory_access_size(mr, l, addr);
2692            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2693                return false;
2694            }
2695        }
2696
2697        len -= l;
2698        addr += l;
2699    }
2700    rcu_read_unlock();
2701    return true;
2702}
2703
2704/* Map a physical memory region into a host virtual address.
2705 * May map a subset of the requested range, given by and returned in *plen.
2706 * May return NULL if resources needed to perform the mapping are exhausted.
2707 * Use only for reads OR writes - not for read-modify-write operations.
2708 * Use cpu_register_map_client() to know when retrying the map operation is
2709 * likely to succeed.
2710 */
2711void *address_space_map(AddressSpace *as,
2712                        hwaddr addr,
2713                        hwaddr *plen,
2714                        bool is_write)
2715{
2716    hwaddr len = *plen;
2717    hwaddr done = 0;
2718    hwaddr l, xlat, base;
2719    MemoryRegion *mr, *this_mr;
2720    ram_addr_t raddr;
2721
2722    if (len == 0) {
2723        return NULL;
2724    }
2725
2726    l = len;
2727    rcu_read_lock();
2728    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2729
2730    if (!memory_access_is_direct(mr, is_write)) {
2731        if (atomic_xchg(&bounce.in_use, true)) {
2732            rcu_read_unlock();
2733            return NULL;
2734        }
2735        /* Avoid unbounded allocations */
2736        l = MIN(l, TARGET_PAGE_SIZE);
2737        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2738        bounce.addr = addr;
2739        bounce.len = l;
2740
2741        memory_region_ref(mr);
2742        bounce.mr = mr;
2743        if (!is_write) {
2744            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2745                               bounce.buffer, l);
2746        }
2747
2748        rcu_read_unlock();
2749        *plen = l;
2750        return bounce.buffer;
2751    }
2752
2753    base = xlat;
2754    raddr = memory_region_get_ram_addr(mr);
2755
2756    for (;;) {
2757        len -= l;
2758        addr += l;
2759        done += l;
2760        if (len == 0) {
2761            break;
2762        }
2763
2764        l = len;
2765        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2766        if (this_mr != mr || xlat != base + done) {
2767            break;
2768        }
2769    }
2770
2771    memory_region_ref(mr);
2772    rcu_read_unlock();
2773    *plen = done;
2774    return qemu_ram_ptr_length(raddr + base, plen);
2775}
2776
2777/* Unmaps a memory region previously mapped by address_space_map().
2778 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2779 * the amount of memory that was actually read or written by the caller.
2780 */
2781void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2782                         int is_write, hwaddr access_len)
2783{
2784    if (buffer != bounce.buffer) {
2785        MemoryRegion *mr;
2786        ram_addr_t addr1;
2787
2788        mr = qemu_ram_addr_from_host(buffer, &addr1);
2789        assert(mr != NULL);
2790        if (is_write) {
2791            invalidate_and_set_dirty(mr, addr1, access_len);
2792        }
2793        if (xen_enabled()) {
2794            xen_invalidate_map_cache_entry(buffer);
2795        }
2796        memory_region_unref(mr);
2797        return;
2798    }
2799    if (is_write) {
2800        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2801                            bounce.buffer, access_len);
2802    }
2803    qemu_vfree(bounce.buffer);
2804    bounce.buffer = NULL;
2805    memory_region_unref(bounce.mr);
2806    atomic_mb_set(&bounce.in_use, false);
2807    cpu_notify_map_clients();
2808}
2809
2810void *cpu_physical_memory_map(hwaddr addr,
2811                              hwaddr *plen,
2812                              int is_write)
2813{
2814    return address_space_map(&address_space_memory, addr, plen, is_write);
2815}
2816
2817void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2818                               int is_write, hwaddr access_len)
2819{
2820    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2821}
2822
2823/* warning: addr must be aligned */
2824static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2825                                                  MemTxAttrs attrs,
2826                                                  MemTxResult *result,
2827                                                  enum device_endian endian)
2828{
2829    uint8_t *ptr;
2830    uint64_t val;
2831    MemoryRegion *mr;
2832    hwaddr l = 4;
2833    hwaddr addr1;
2834    MemTxResult r;
2835    bool release_lock = false;
2836
2837    rcu_read_lock();
2838    mr = address_space_translate(as, addr, &addr1, &l, false);
2839    if (l < 4 || !memory_access_is_direct(mr, false)) {
2840        release_lock |= prepare_mmio_access(mr);
2841
2842        /* I/O case */
2843        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2844#if defined(TARGET_WORDS_BIGENDIAN)
2845        if (endian == DEVICE_LITTLE_ENDIAN) {
2846            val = bswap32(val);
2847        }
2848#else
2849        if (endian == DEVICE_BIG_ENDIAN) {
2850            val = bswap32(val);
2851        }
2852#endif
2853    } else {
2854        /* RAM case */
2855        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2856                                & TARGET_PAGE_MASK)
2857                               + addr1);
2858        switch (endian) {
2859        case DEVICE_LITTLE_ENDIAN:
2860            val = ldl_le_p(ptr);
2861            break;
2862        case DEVICE_BIG_ENDIAN:
2863            val = ldl_be_p(ptr);
2864            break;
2865        default:
2866            val = ldl_p(ptr);
2867            break;
2868        }
2869        r = MEMTX_OK;
2870    }
2871    if (result) {
2872        *result = r;
2873    }
2874    if (release_lock) {
2875        qemu_mutex_unlock_iothread();
2876    }
2877    rcu_read_unlock();
2878    return val;
2879}
2880
2881uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2882                           MemTxAttrs attrs, MemTxResult *result)
2883{
2884    return address_space_ldl_internal(as, addr, attrs, result,
2885                                      DEVICE_NATIVE_ENDIAN);
2886}
2887
2888uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2889                              MemTxAttrs attrs, MemTxResult *result)
2890{
2891    return address_space_ldl_internal(as, addr, attrs, result,
2892                                      DEVICE_LITTLE_ENDIAN);
2893}
2894
2895uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2896                              MemTxAttrs attrs, MemTxResult *result)
2897{
2898    return address_space_ldl_internal(as, addr, attrs, result,
2899                                      DEVICE_BIG_ENDIAN);
2900}
2901
2902uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2903{
2904    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2905}
2906
2907uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2908{
2909    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2910}
2911
2912uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2913{
2914    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2915}
2916
2917/* warning: addr must be aligned */
2918static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2919                                                  MemTxAttrs attrs,
2920                                                  MemTxResult *result,
2921                                                  enum device_endian endian)
2922{
2923    uint8_t *ptr;
2924    uint64_t val;
2925    MemoryRegion *mr;
2926    hwaddr l = 8;
2927    hwaddr addr1;
2928    MemTxResult r;
2929    bool release_lock = false;
2930
2931    rcu_read_lock();
2932    mr = address_space_translate(as, addr, &addr1, &l,
2933                                 false);
2934    if (l < 8 || !memory_access_is_direct(mr, false)) {
2935        release_lock |= prepare_mmio_access(mr);
2936
2937        /* I/O case */
2938        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2939#if defined(TARGET_WORDS_BIGENDIAN)
2940        if (endian == DEVICE_LITTLE_ENDIAN) {
2941            val = bswap64(val);
2942        }
2943#else
2944        if (endian == DEVICE_BIG_ENDIAN) {
2945            val = bswap64(val);
2946        }
2947#endif
2948    } else {
2949        /* RAM case */
2950        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2951                                & TARGET_PAGE_MASK)
2952                               + addr1);
2953        switch (endian) {
2954        case DEVICE_LITTLE_ENDIAN:
2955            val = ldq_le_p(ptr);
2956            break;
2957        case DEVICE_BIG_ENDIAN:
2958            val = ldq_be_p(ptr);
2959            break;
2960        default:
2961            val = ldq_p(ptr);
2962            break;
2963        }
2964        r = MEMTX_OK;
2965    }
2966    if (result) {
2967        *result = r;
2968    }
2969    if (release_lock) {
2970        qemu_mutex_unlock_iothread();
2971    }
2972    rcu_read_unlock();
2973    return val;
2974}
2975
2976uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2977                           MemTxAttrs attrs, MemTxResult *result)
2978{
2979    return address_space_ldq_internal(as, addr, attrs, result,
2980                                      DEVICE_NATIVE_ENDIAN);
2981}
2982
2983uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2984                           MemTxAttrs attrs, MemTxResult *result)
2985{
2986    return address_space_ldq_internal(as, addr, attrs, result,
2987                                      DEVICE_LITTLE_ENDIAN);
2988}
2989
2990uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2991                           MemTxAttrs attrs, MemTxResult *result)
2992{
2993    return address_space_ldq_internal(as, addr, attrs, result,
2994                                      DEVICE_BIG_ENDIAN);
2995}
2996
2997uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2998{
2999    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3000}
3001
3002uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3003{
3004    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3005}
3006
3007uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3008{
3009    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3010}
3011
3012/* XXX: optimize */
3013uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3014                            MemTxAttrs attrs, MemTxResult *result)
3015{
3016    uint8_t val;
3017    MemTxResult r;
3018
3019    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3020    if (result) {
3021        *result = r;
3022    }
3023    return val;
3024}
3025
3026uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3027{
3028    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3029}
3030
3031/* warning: addr must be aligned */
3032static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3033                                                   hwaddr addr,
3034                                                   MemTxAttrs attrs,
3035                                                   MemTxResult *result,
3036                                                   enum device_endian endian)
3037{
3038    uint8_t *ptr;
3039    uint64_t val;
3040    MemoryRegion *mr;
3041    hwaddr l = 2;
3042    hwaddr addr1;
3043    MemTxResult r;
3044    bool release_lock = false;
3045
3046    rcu_read_lock();
3047    mr = address_space_translate(as, addr, &addr1, &l,
3048                                 false);
3049    if (l < 2 || !memory_access_is_direct(mr, false)) {
3050        release_lock |= prepare_mmio_access(mr);
3051
3052        /* I/O case */
3053        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3054#if defined(TARGET_WORDS_BIGENDIAN)
3055        if (endian == DEVICE_LITTLE_ENDIAN) {
3056            val = bswap16(val);
3057        }
3058#else
3059        if (endian == DEVICE_BIG_ENDIAN) {
3060            val = bswap16(val);
3061        }
3062#endif
3063    } else {
3064        /* RAM case */
3065        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3066                                & TARGET_PAGE_MASK)
3067                               + addr1);
3068        switch (endian) {
3069        case DEVICE_LITTLE_ENDIAN:
3070            val = lduw_le_p(ptr);
3071            break;
3072        case DEVICE_BIG_ENDIAN:
3073            val = lduw_be_p(ptr);
3074            break;
3075        default:
3076            val = lduw_p(ptr);
3077            break;
3078        }
3079        r = MEMTX_OK;
3080    }
3081    if (result) {
3082        *result = r;
3083    }
3084    if (release_lock) {
3085        qemu_mutex_unlock_iothread();
3086    }
3087    rcu_read_unlock();
3088    return val;
3089}
3090
3091uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3092                           MemTxAttrs attrs, MemTxResult *result)
3093{
3094    return address_space_lduw_internal(as, addr, attrs, result,
3095                                       DEVICE_NATIVE_ENDIAN);
3096}
3097
3098uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3099                           MemTxAttrs attrs, MemTxResult *result)
3100{
3101    return address_space_lduw_internal(as, addr, attrs, result,
3102                                       DEVICE_LITTLE_ENDIAN);
3103}
3104
3105uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3106                           MemTxAttrs attrs, MemTxResult *result)
3107{
3108    return address_space_lduw_internal(as, addr, attrs, result,
3109                                       DEVICE_BIG_ENDIAN);
3110}
3111
3112uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3113{
3114    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3115}
3116
3117uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3118{
3119    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3120}
3121
3122uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3123{
3124    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3125}
3126
3127/* warning: addr must be aligned. The ram page is not masked as dirty
3128   and the code inside is not invalidated. It is useful if the dirty
3129   bits are used to track modified PTEs */
3130void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3131                                MemTxAttrs attrs, MemTxResult *result)
3132{
3133    uint8_t *ptr;
3134    MemoryRegion *mr;
3135    hwaddr l = 4;
3136    hwaddr addr1;
3137    MemTxResult r;
3138    uint8_t dirty_log_mask;
3139    bool release_lock = false;
3140
3141    rcu_read_lock();
3142    mr = address_space_translate(as, addr, &addr1, &l,
3143                                 true);
3144    if (l < 4 || !memory_access_is_direct(mr, true)) {
3145        release_lock |= prepare_mmio_access(mr);
3146
3147        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3148    } else {
3149        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3150        ptr = qemu_get_ram_ptr(addr1);
3151        stl_p(ptr, val);
3152
3153        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3154        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3155        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3156        r = MEMTX_OK;
3157    }
3158    if (result) {
3159        *result = r;
3160    }
3161    if (release_lock) {
3162        qemu_mutex_unlock_iothread();
3163    }
3164    rcu_read_unlock();
3165}
3166
3167void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3168{
3169    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3170}
3171
3172/* warning: addr must be aligned */
3173static inline void address_space_stl_internal(AddressSpace *as,
3174                                              hwaddr addr, uint32_t val,
3175                                              MemTxAttrs attrs,
3176                                              MemTxResult *result,
3177                                              enum device_endian endian)
3178{
3179    uint8_t *ptr;
3180    MemoryRegion *mr;
3181    hwaddr l = 4;
3182    hwaddr addr1;
3183    MemTxResult r;
3184    bool release_lock = false;
3185
3186    rcu_read_lock();
3187    mr = address_space_translate(as, addr, &addr1, &l,
3188                                 true);
3189    if (l < 4 || !memory_access_is_direct(mr, true)) {
3190        release_lock |= prepare_mmio_access(mr);
3191
3192#if defined(TARGET_WORDS_BIGENDIAN)
3193        if (endian == DEVICE_LITTLE_ENDIAN) {
3194            val = bswap32(val);
3195        }
3196#else
3197        if (endian == DEVICE_BIG_ENDIAN) {
3198            val = bswap32(val);
3199        }
3200#endif
3201        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3202    } else {
3203        /* RAM case */
3204        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3205        ptr = qemu_get_ram_ptr(addr1);
3206        switch (endian) {
3207        case DEVICE_LITTLE_ENDIAN:
3208            stl_le_p(ptr, val);
3209            break;
3210        case DEVICE_BIG_ENDIAN:
3211            stl_be_p(ptr, val);
3212            break;
3213        default:
3214            stl_p(ptr, val);
3215            break;
3216        }
3217        invalidate_and_set_dirty(mr, addr1, 4);
3218        r = MEMTX_OK;
3219    }
3220    if (result) {
3221        *result = r;
3222    }
3223    if (release_lock) {
3224        qemu_mutex_unlock_iothread();
3225    }
3226    rcu_read_unlock();
3227}
3228
3229void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3230                       MemTxAttrs attrs, MemTxResult *result)
3231{
3232    address_space_stl_internal(as, addr, val, attrs, result,
3233                               DEVICE_NATIVE_ENDIAN);
3234}
3235
3236void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3237                       MemTxAttrs attrs, MemTxResult *result)
3238{
3239    address_space_stl_internal(as, addr, val, attrs, result,
3240                               DEVICE_LITTLE_ENDIAN);
3241}
3242
3243void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3244                       MemTxAttrs attrs, MemTxResult *result)
3245{
3246    address_space_stl_internal(as, addr, val, attrs, result,
3247                               DEVICE_BIG_ENDIAN);
3248}
3249
3250void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3251{
3252    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3253}
3254
3255void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3256{
3257    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3258}
3259
3260void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3261{
3262    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3263}
3264
3265/* XXX: optimize */
3266void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3267                       MemTxAttrs attrs, MemTxResult *result)
3268{
3269    uint8_t v = val;
3270    MemTxResult r;
3271
3272    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3273    if (result) {
3274        *result = r;
3275    }
3276}
3277
3278void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3279{
3280    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3281}
3282
3283/* warning: addr must be aligned */
3284static inline void address_space_stw_internal(AddressSpace *as,
3285                                              hwaddr addr, uint32_t val,
3286                                              MemTxAttrs attrs,
3287                                              MemTxResult *result,
3288                                              enum device_endian endian)
3289{
3290    uint8_t *ptr;
3291    MemoryRegion *mr;
3292    hwaddr l = 2;
3293    hwaddr addr1;
3294    MemTxResult r;
3295    bool release_lock = false;
3296
3297    rcu_read_lock();
3298    mr = address_space_translate(as, addr, &addr1, &l, true);
3299    if (l < 2 || !memory_access_is_direct(mr, true)) {
3300        release_lock |= prepare_mmio_access(mr);
3301
3302#if defined(TARGET_WORDS_BIGENDIAN)
3303        if (endian == DEVICE_LITTLE_ENDIAN) {
3304            val = bswap16(val);
3305        }
3306#else
3307        if (endian == DEVICE_BIG_ENDIAN) {
3308            val = bswap16(val);
3309        }
3310#endif
3311        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3312    } else {
3313        /* RAM case */
3314        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3315        ptr = qemu_get_ram_ptr(addr1);
3316        switch (endian) {
3317        case DEVICE_LITTLE_ENDIAN:
3318            stw_le_p(ptr, val);
3319            break;
3320        case DEVICE_BIG_ENDIAN:
3321            stw_be_p(ptr, val);
3322            break;
3323        default:
3324            stw_p(ptr, val);
3325            break;
3326        }
3327        invalidate_and_set_dirty(mr, addr1, 2);
3328        r = MEMTX_OK;
3329    }
3330    if (result) {
3331        *result = r;
3332    }
3333    if (release_lock) {
3334        qemu_mutex_unlock_iothread();
3335    }
3336    rcu_read_unlock();
3337}
3338
3339void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3340                       MemTxAttrs attrs, MemTxResult *result)
3341{
3342    address_space_stw_internal(as, addr, val, attrs, result,
3343                               DEVICE_NATIVE_ENDIAN);
3344}
3345
3346void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3347                       MemTxAttrs attrs, MemTxResult *result)
3348{
3349    address_space_stw_internal(as, addr, val, attrs, result,
3350                               DEVICE_LITTLE_ENDIAN);
3351}
3352
3353void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3354                       MemTxAttrs attrs, MemTxResult *result)
3355{
3356    address_space_stw_internal(as, addr, val, attrs, result,
3357                               DEVICE_BIG_ENDIAN);
3358}
3359
3360void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3361{
3362    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3363}
3364
3365void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3366{
3367    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3368}
3369
3370void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3371{
3372    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3373}
3374
3375/* XXX: optimize */
3376void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3377                       MemTxAttrs attrs, MemTxResult *result)
3378{
3379    MemTxResult r;
3380    val = tswap64(val);
3381    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3382    if (result) {
3383        *result = r;
3384    }
3385}
3386
3387void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3388                       MemTxAttrs attrs, MemTxResult *result)
3389{
3390    MemTxResult r;
3391    val = cpu_to_le64(val);
3392    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3393    if (result) {
3394        *result = r;
3395    }
3396}
3397void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3398                       MemTxAttrs attrs, MemTxResult *result)
3399{
3400    MemTxResult r;
3401    val = cpu_to_be64(val);
3402    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3403    if (result) {
3404        *result = r;
3405    }
3406}
3407
3408void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3409{
3410    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3411}
3412
3413void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3414{
3415    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3416}
3417
3418void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3419{
3420    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3421}
3422
3423/* virtual memory access for debug (includes writing to ROM) */
3424int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3425                        uint8_t *buf, int len, int is_write)
3426{
3427    int l;
3428    hwaddr phys_addr;
3429    target_ulong page;
3430
3431    while (len > 0) {
3432        page = addr & TARGET_PAGE_MASK;
3433        phys_addr = cpu_get_phys_page_debug(cpu, page);
3434        /* if no physical page mapped, return an error */
3435        if (phys_addr == -1)
3436            return -1;
3437        l = (page + TARGET_PAGE_SIZE) - addr;
3438        if (l > len)
3439            l = len;
3440        phys_addr += (addr & ~TARGET_PAGE_MASK);
3441        if (is_write) {
3442            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3443        } else {
3444            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3445                             buf, l, 0);
3446        }
3447        len -= l;
3448        buf += l;
3449        addr += l;
3450    }
3451    return 0;
3452}
3453#endif
3454
3455/*
3456 * A helper function for the _utterly broken_ virtio device model to find out if
3457 * it's running on a big endian machine. Don't do this at home kids!
3458 */
3459bool target_words_bigendian(void);
3460bool target_words_bigendian(void)
3461{
3462#if defined(TARGET_WORDS_BIGENDIAN)
3463    return true;
3464#else
3465    return false;
3466#endif
3467}
3468
3469#ifndef CONFIG_USER_ONLY
3470bool cpu_physical_memory_is_io(hwaddr phys_addr)
3471{
3472    MemoryRegion*mr;
3473    hwaddr l = 1;
3474    bool res;
3475
3476    rcu_read_lock();
3477    mr = address_space_translate(&address_space_memory,
3478                                 phys_addr, &phys_addr, &l, false);
3479
3480    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3481    rcu_read_unlock();
3482    return res;
3483}
3484
3485int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3486{
3487    RAMBlock *block;
3488    int ret = 0;
3489
3490    rcu_read_lock();
3491    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3492        ret = func(block->idstr, block->host, block->offset,
3493                   block->used_length, opaque);
3494        if (ret) {
3495            break;
3496        }
3497    }
3498    rcu_read_unlock();
3499    return ret;
3500}
3501#endif
3502