qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu/cutils.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "hw/xen/xen.h"
  36#include "qemu/timer.h"
  37#include "qemu/config-file.h"
  38#include "qemu/error-report.h"
  39#include "exec/memory.h"
  40#include "sysemu/dma.h"
  41#include "exec/address-spaces.h"
  42#if defined(CONFIG_USER_ONLY)
  43#include <qemu.h>
  44#else /* !CONFIG_USER_ONLY */
  45#include "sysemu/xen-mapcache.h"
  46#include "trace.h"
  47#endif
  48#include "exec/cpu-all.h"
  49#include "qemu/rcu_queue.h"
  50#include "qemu/main-loop.h"
  51#include "translate-all.h"
  52#include "sysemu/replay.h"
  53
  54#include "exec/memory-internal.h"
  55#include "exec/ram_addr.h"
  56#include "exec/log.h"
  57
  58#include "qemu/range.h"
  59#ifndef _WIN32
  60#include "qemu/mmap-alloc.h"
  61#endif
  62
  63//#define DEBUG_SUBPAGE
  64
  65#if !defined(CONFIG_USER_ONLY)
  66/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67 * are protected by the ramlist lock.
  68 */
  69RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71static MemoryRegion *system_memory;
  72static MemoryRegion *system_io;
  73
  74AddressSpace address_space_io;
  75AddressSpace address_space_memory;
  76
  77MemoryRegion io_mem_rom, io_mem_notdirty;
  78static MemoryRegion io_mem_unassigned;
  79
  80/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81#define RAM_PREALLOC   (1 << 0)
  82
  83/* RAM is mmap-ed with MAP_SHARED */
  84#define RAM_SHARED     (1 << 1)
  85
  86/* Only a portion of RAM (used_length) is actually used, and migrated.
  87 * This used_length size can change across reboots.
  88 */
  89#define RAM_RESIZEABLE (1 << 2)
  90
  91#endif
  92
  93struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  94/* current CPU in the current thread. It is only valid inside
  95   cpu_exec() */
  96__thread CPUState *current_cpu;
  97/* 0 = Do not count executed instructions.
  98   1 = Precise instruction counting.
  99   2 = Adaptive rate instruction counting.  */
 100int use_icount;
 101
 102#if !defined(CONFIG_USER_ONLY)
 103
 104typedef struct PhysPageEntry PhysPageEntry;
 105
 106struct PhysPageEntry {
 107    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 108    uint32_t skip : 6;
 109     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 110    uint32_t ptr : 26;
 111};
 112
 113#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 114
 115/* Size of the L2 (and L3, etc) page tables.  */
 116#define ADDR_SPACE_BITS 64
 117
 118#define P_L2_BITS 9
 119#define P_L2_SIZE (1 << P_L2_BITS)
 120
 121#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 122
 123typedef PhysPageEntry Node[P_L2_SIZE];
 124
 125typedef struct PhysPageMap {
 126    struct rcu_head rcu;
 127
 128    unsigned sections_nb;
 129    unsigned sections_nb_alloc;
 130    unsigned nodes_nb;
 131    unsigned nodes_nb_alloc;
 132    Node *nodes;
 133    MemoryRegionSection *sections;
 134} PhysPageMap;
 135
 136struct AddressSpaceDispatch {
 137    struct rcu_head rcu;
 138
 139    MemoryRegionSection *mru_section;
 140    /* This is a multi-level map on the physical address space.
 141     * The bottom level has pointers to MemoryRegionSections.
 142     */
 143    PhysPageEntry phys_map;
 144    PhysPageMap map;
 145    AddressSpace *as;
 146};
 147
 148#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 149typedef struct subpage_t {
 150    MemoryRegion iomem;
 151    AddressSpace *as;
 152    hwaddr base;
 153    uint16_t sub_section[TARGET_PAGE_SIZE];
 154} subpage_t;
 155
 156#define PHYS_SECTION_UNASSIGNED 0
 157#define PHYS_SECTION_NOTDIRTY 1
 158#define PHYS_SECTION_ROM 2
 159#define PHYS_SECTION_WATCH 3
 160
 161static void io_mem_init(void);
 162static void memory_map_init(void);
 163static void tcg_commit(MemoryListener *listener);
 164
 165static MemoryRegion io_mem_watch;
 166
 167/**
 168 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 169 * @cpu: the CPU whose AddressSpace this is
 170 * @as: the AddressSpace itself
 171 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 172 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 173 */
 174struct CPUAddressSpace {
 175    CPUState *cpu;
 176    AddressSpace *as;
 177    struct AddressSpaceDispatch *memory_dispatch;
 178    MemoryListener tcg_as_listener;
 179};
 180
 181#endif
 182
 183#if !defined(CONFIG_USER_ONLY)
 184
 185static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 186{
 187    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 188        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 189        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 190        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 191    }
 192}
 193
 194static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 195{
 196    unsigned i;
 197    uint32_t ret;
 198    PhysPageEntry e;
 199    PhysPageEntry *p;
 200
 201    ret = map->nodes_nb++;
 202    p = map->nodes[ret];
 203    assert(ret != PHYS_MAP_NODE_NIL);
 204    assert(ret != map->nodes_nb_alloc);
 205
 206    e.skip = leaf ? 0 : 1;
 207    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 208    for (i = 0; i < P_L2_SIZE; ++i) {
 209        memcpy(&p[i], &e, sizeof(e));
 210    }
 211    return ret;
 212}
 213
 214static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 215                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 216                                int level)
 217{
 218    PhysPageEntry *p;
 219    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 220
 221    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 222        lp->ptr = phys_map_node_alloc(map, level == 0);
 223    }
 224    p = map->nodes[lp->ptr];
 225    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 226
 227    while (*nb && lp < &p[P_L2_SIZE]) {
 228        if ((*index & (step - 1)) == 0 && *nb >= step) {
 229            lp->skip = 0;
 230            lp->ptr = leaf;
 231            *index += step;
 232            *nb -= step;
 233        } else {
 234            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 235        }
 236        ++lp;
 237    }
 238}
 239
 240static void phys_page_set(AddressSpaceDispatch *d,
 241                          hwaddr index, hwaddr nb,
 242                          uint16_t leaf)
 243{
 244    /* Wildly overreserve - it doesn't matter much. */
 245    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 246
 247    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 248}
 249
 250/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 251 * and update our entry so we can skip it and go directly to the destination.
 252 */
 253static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 254{
 255    unsigned valid_ptr = P_L2_SIZE;
 256    int valid = 0;
 257    PhysPageEntry *p;
 258    int i;
 259
 260    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 261        return;
 262    }
 263
 264    p = nodes[lp->ptr];
 265    for (i = 0; i < P_L2_SIZE; i++) {
 266        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 267            continue;
 268        }
 269
 270        valid_ptr = i;
 271        valid++;
 272        if (p[i].skip) {
 273            phys_page_compact(&p[i], nodes, compacted);
 274        }
 275    }
 276
 277    /* We can only compress if there's only one child. */
 278    if (valid != 1) {
 279        return;
 280    }
 281
 282    assert(valid_ptr < P_L2_SIZE);
 283
 284    /* Don't compress if it won't fit in the # of bits we have. */
 285    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 286        return;
 287    }
 288
 289    lp->ptr = p[valid_ptr].ptr;
 290    if (!p[valid_ptr].skip) {
 291        /* If our only child is a leaf, make this a leaf. */
 292        /* By design, we should have made this node a leaf to begin with so we
 293         * should never reach here.
 294         * But since it's so simple to handle this, let's do it just in case we
 295         * change this rule.
 296         */
 297        lp->skip = 0;
 298    } else {
 299        lp->skip += p[valid_ptr].skip;
 300    }
 301}
 302
 303static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 304{
 305    DECLARE_BITMAP(compacted, nodes_nb);
 306
 307    if (d->phys_map.skip) {
 308        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 309    }
 310}
 311
 312static inline bool section_covers_addr(const MemoryRegionSection *section,
 313                                       hwaddr addr)
 314{
 315    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 316     * the section must cover the entire address space.
 317     */
 318    return section->size.hi ||
 319           range_covers_byte(section->offset_within_address_space,
 320                             section->size.lo, addr);
 321}
 322
 323static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 324                                           Node *nodes, MemoryRegionSection *sections)
 325{
 326    PhysPageEntry *p;
 327    hwaddr index = addr >> TARGET_PAGE_BITS;
 328    int i;
 329
 330    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 331        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 332            return &sections[PHYS_SECTION_UNASSIGNED];
 333        }
 334        p = nodes[lp.ptr];
 335        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 336    }
 337
 338    if (section_covers_addr(&sections[lp.ptr], addr)) {
 339        return &sections[lp.ptr];
 340    } else {
 341        return &sections[PHYS_SECTION_UNASSIGNED];
 342    }
 343}
 344
 345bool memory_region_is_unassigned(MemoryRegion *mr)
 346{
 347    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 348        && mr != &io_mem_watch;
 349}
 350
 351/* Called from RCU critical section */
 352static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 353                                                        hwaddr addr,
 354                                                        bool resolve_subpage)
 355{
 356    MemoryRegionSection *section = atomic_read(&d->mru_section);
 357    subpage_t *subpage;
 358    bool update;
 359
 360    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 361        section_covers_addr(section, addr)) {
 362        update = false;
 363    } else {
 364        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 365                                 d->map.sections);
 366        update = true;
 367    }
 368    if (resolve_subpage && section->mr->subpage) {
 369        subpage = container_of(section->mr, subpage_t, iomem);
 370        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 371    }
 372    if (update) {
 373        atomic_set(&d->mru_section, section);
 374    }
 375    return section;
 376}
 377
 378/* Called from RCU critical section */
 379static MemoryRegionSection *
 380address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 381                                 hwaddr *plen, bool resolve_subpage)
 382{
 383    MemoryRegionSection *section;
 384    MemoryRegion *mr;
 385    Int128 diff;
 386
 387    section = address_space_lookup_region(d, addr, resolve_subpage);
 388    /* Compute offset within MemoryRegionSection */
 389    addr -= section->offset_within_address_space;
 390
 391    /* Compute offset within MemoryRegion */
 392    *xlat = addr + section->offset_within_region;
 393
 394    mr = section->mr;
 395
 396    /* MMIO registers can be expected to perform full-width accesses based only
 397     * on their address, without considering adjacent registers that could
 398     * decode to completely different MemoryRegions.  When such registers
 399     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 400     * regions overlap wildly.  For this reason we cannot clamp the accesses
 401     * here.
 402     *
 403     * If the length is small (as is the case for address_space_ldl/stl),
 404     * everything works fine.  If the incoming length is large, however,
 405     * the caller really has to do the clamping through memory_access_size.
 406     */
 407    if (memory_region_is_ram(mr)) {
 408        diff = int128_sub(section->size, int128_make64(addr));
 409        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 410    }
 411    return section;
 412}
 413
 414/* Called from RCU critical section */
 415MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 416                                      hwaddr *xlat, hwaddr *plen,
 417                                      bool is_write)
 418{
 419    IOMMUTLBEntry iotlb;
 420    MemoryRegionSection *section;
 421    MemoryRegion *mr;
 422
 423    for (;;) {
 424        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 425        section = address_space_translate_internal(d, addr, &addr, plen, true);
 426        mr = section->mr;
 427
 428        if (!mr->iommu_ops) {
 429            break;
 430        }
 431
 432        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 433        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 434                | (addr & iotlb.addr_mask));
 435        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 436        if (!(iotlb.perm & (1 << is_write))) {
 437            mr = &io_mem_unassigned;
 438            break;
 439        }
 440
 441        as = iotlb.target_as;
 442    }
 443
 444    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 445        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 446        *plen = MIN(page, *plen);
 447    }
 448
 449    *xlat = addr;
 450    return mr;
 451}
 452
 453/* Called from RCU critical section */
 454MemoryRegionSection *
 455address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 456                                  hwaddr *xlat, hwaddr *plen)
 457{
 458    MemoryRegionSection *section;
 459    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
 460
 461    section = address_space_translate_internal(d, addr, xlat, plen, false);
 462
 463    assert(!section->mr->iommu_ops);
 464    return section;
 465}
 466#endif
 467
 468#if !defined(CONFIG_USER_ONLY)
 469
 470static int cpu_common_post_load(void *opaque, int version_id)
 471{
 472    CPUState *cpu = opaque;
 473
 474    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 475       version_id is increased. */
 476    cpu->interrupt_request &= ~0x01;
 477    tlb_flush(cpu, 1);
 478
 479    return 0;
 480}
 481
 482static int cpu_common_pre_load(void *opaque)
 483{
 484    CPUState *cpu = opaque;
 485
 486    cpu->exception_index = -1;
 487
 488    return 0;
 489}
 490
 491static bool cpu_common_exception_index_needed(void *opaque)
 492{
 493    CPUState *cpu = opaque;
 494
 495    return tcg_enabled() && cpu->exception_index != -1;
 496}
 497
 498static const VMStateDescription vmstate_cpu_common_exception_index = {
 499    .name = "cpu_common/exception_index",
 500    .version_id = 1,
 501    .minimum_version_id = 1,
 502    .needed = cpu_common_exception_index_needed,
 503    .fields = (VMStateField[]) {
 504        VMSTATE_INT32(exception_index, CPUState),
 505        VMSTATE_END_OF_LIST()
 506    }
 507};
 508
 509static bool cpu_common_crash_occurred_needed(void *opaque)
 510{
 511    CPUState *cpu = opaque;
 512
 513    return cpu->crash_occurred;
 514}
 515
 516static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 517    .name = "cpu_common/crash_occurred",
 518    .version_id = 1,
 519    .minimum_version_id = 1,
 520    .needed = cpu_common_crash_occurred_needed,
 521    .fields = (VMStateField[]) {
 522        VMSTATE_BOOL(crash_occurred, CPUState),
 523        VMSTATE_END_OF_LIST()
 524    }
 525};
 526
 527const VMStateDescription vmstate_cpu_common = {
 528    .name = "cpu_common",
 529    .version_id = 1,
 530    .minimum_version_id = 1,
 531    .pre_load = cpu_common_pre_load,
 532    .post_load = cpu_common_post_load,
 533    .fields = (VMStateField[]) {
 534        VMSTATE_UINT32(halted, CPUState),
 535        VMSTATE_UINT32(interrupt_request, CPUState),
 536        VMSTATE_END_OF_LIST()
 537    },
 538    .subsections = (const VMStateDescription*[]) {
 539        &vmstate_cpu_common_exception_index,
 540        &vmstate_cpu_common_crash_occurred,
 541        NULL
 542    }
 543};
 544
 545#endif
 546
 547CPUState *qemu_get_cpu(int index)
 548{
 549    CPUState *cpu;
 550
 551    CPU_FOREACH(cpu) {
 552        if (cpu->cpu_index == index) {
 553            return cpu;
 554        }
 555    }
 556
 557    return NULL;
 558}
 559
 560#if !defined(CONFIG_USER_ONLY)
 561void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 562{
 563    CPUAddressSpace *newas;
 564
 565    /* Target code should have set num_ases before calling us */
 566    assert(asidx < cpu->num_ases);
 567
 568    if (asidx == 0) {
 569        /* address space 0 gets the convenience alias */
 570        cpu->as = as;
 571    }
 572
 573    /* KVM cannot currently support multiple address spaces. */
 574    assert(asidx == 0 || !kvm_enabled());
 575
 576    if (!cpu->cpu_ases) {
 577        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 578    }
 579
 580    newas = &cpu->cpu_ases[asidx];
 581    newas->cpu = cpu;
 582    newas->as = as;
 583    if (tcg_enabled()) {
 584        newas->tcg_as_listener.commit = tcg_commit;
 585        memory_listener_register(&newas->tcg_as_listener, as);
 586    }
 587}
 588
 589AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 590{
 591    /* Return the AddressSpace corresponding to the specified index */
 592    return cpu->cpu_ases[asidx].as;
 593}
 594#endif
 595
 596#ifndef CONFIG_USER_ONLY
 597static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 598
 599static int cpu_get_free_index(Error **errp)
 600{
 601    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 602
 603    if (cpu >= MAX_CPUMASK_BITS) {
 604        error_setg(errp, "Trying to use more CPUs than max of %d",
 605                   MAX_CPUMASK_BITS);
 606        return -1;
 607    }
 608
 609    bitmap_set(cpu_index_map, cpu, 1);
 610    return cpu;
 611}
 612
 613void cpu_exec_exit(CPUState *cpu)
 614{
 615    if (cpu->cpu_index == -1) {
 616        /* cpu_index was never allocated by this @cpu or was already freed. */
 617        return;
 618    }
 619
 620    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 621    cpu->cpu_index = -1;
 622}
 623#else
 624
 625static int cpu_get_free_index(Error **errp)
 626{
 627    CPUState *some_cpu;
 628    int cpu_index = 0;
 629
 630    CPU_FOREACH(some_cpu) {
 631        cpu_index++;
 632    }
 633    return cpu_index;
 634}
 635
 636void cpu_exec_exit(CPUState *cpu)
 637{
 638}
 639#endif
 640
 641void cpu_exec_init(CPUState *cpu, Error **errp)
 642{
 643    CPUClass *cc = CPU_GET_CLASS(cpu);
 644    int cpu_index;
 645    Error *local_err = NULL;
 646
 647    cpu->as = NULL;
 648    cpu->num_ases = 0;
 649
 650#ifndef CONFIG_USER_ONLY
 651    cpu->thread_id = qemu_get_thread_id();
 652
 653    /* This is a softmmu CPU object, so create a property for it
 654     * so users can wire up its memory. (This can't go in qom/cpu.c
 655     * because that file is compiled only once for both user-mode
 656     * and system builds.) The default if no link is set up is to use
 657     * the system address space.
 658     */
 659    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 660                             (Object **)&cpu->memory,
 661                             qdev_prop_allow_set_link_before_realize,
 662                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 663                             &error_abort);
 664    cpu->memory = system_memory;
 665    object_ref(OBJECT(cpu->memory));
 666#endif
 667
 668#if defined(CONFIG_USER_ONLY)
 669    cpu_list_lock();
 670#endif
 671    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 672    if (local_err) {
 673        error_propagate(errp, local_err);
 674#if defined(CONFIG_USER_ONLY)
 675        cpu_list_unlock();
 676#endif
 677        return;
 678    }
 679    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 680#if defined(CONFIG_USER_ONLY)
 681    cpu_list_unlock();
 682#endif
 683    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 684        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 685    }
 686    if (cc->vmsd != NULL) {
 687        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 688    }
 689}
 690
 691#if defined(CONFIG_USER_ONLY)
 692static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 693{
 694    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 695}
 696#else
 697static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 698{
 699    MemTxAttrs attrs;
 700    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 701    int asidx = cpu_asidx_from_attrs(cpu, attrs);
 702    if (phys != -1) {
 703        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 704                                phys | (pc & ~TARGET_PAGE_MASK));
 705    }
 706}
 707#endif
 708
 709#if defined(CONFIG_USER_ONLY)
 710void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 711
 712{
 713}
 714
 715int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 716                          int flags)
 717{
 718    return -ENOSYS;
 719}
 720
 721void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 722{
 723}
 724
 725int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 726                          int flags, CPUWatchpoint **watchpoint)
 727{
 728    return -ENOSYS;
 729}
 730#else
 731/* Add a watchpoint.  */
 732int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 733                          int flags, CPUWatchpoint **watchpoint)
 734{
 735    CPUWatchpoint *wp;
 736
 737    /* forbid ranges which are empty or run off the end of the address space */
 738    if (len == 0 || (addr + len - 1) < addr) {
 739        error_report("tried to set invalid watchpoint at %"
 740                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 741        return -EINVAL;
 742    }
 743    wp = g_malloc(sizeof(*wp));
 744
 745    wp->vaddr = addr;
 746    wp->len = len;
 747    wp->flags = flags;
 748
 749    /* keep all GDB-injected watchpoints in front */
 750    if (flags & BP_GDB) {
 751        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 752    } else {
 753        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 754    }
 755
 756    tlb_flush_page(cpu, addr);
 757
 758    if (watchpoint)
 759        *watchpoint = wp;
 760    return 0;
 761}
 762
 763/* Remove a specific watchpoint.  */
 764int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 765                          int flags)
 766{
 767    CPUWatchpoint *wp;
 768
 769    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 770        if (addr == wp->vaddr && len == wp->len
 771                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 772            cpu_watchpoint_remove_by_ref(cpu, wp);
 773            return 0;
 774        }
 775    }
 776    return -ENOENT;
 777}
 778
 779/* Remove a specific watchpoint by reference.  */
 780void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 781{
 782    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 783
 784    tlb_flush_page(cpu, watchpoint->vaddr);
 785
 786    g_free(watchpoint);
 787}
 788
 789/* Remove all matching watchpoints.  */
 790void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 791{
 792    CPUWatchpoint *wp, *next;
 793
 794    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 795        if (wp->flags & mask) {
 796            cpu_watchpoint_remove_by_ref(cpu, wp);
 797        }
 798    }
 799}
 800
 801/* Return true if this watchpoint address matches the specified
 802 * access (ie the address range covered by the watchpoint overlaps
 803 * partially or completely with the address range covered by the
 804 * access).
 805 */
 806static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 807                                                  vaddr addr,
 808                                                  vaddr len)
 809{
 810    /* We know the lengths are non-zero, but a little caution is
 811     * required to avoid errors in the case where the range ends
 812     * exactly at the top of the address space and so addr + len
 813     * wraps round to zero.
 814     */
 815    vaddr wpend = wp->vaddr + wp->len - 1;
 816    vaddr addrend = addr + len - 1;
 817
 818    return !(addr > wpend || wp->vaddr > addrend);
 819}
 820
 821#endif
 822
 823/* Add a breakpoint.  */
 824int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 825                          CPUBreakpoint **breakpoint)
 826{
 827    CPUBreakpoint *bp;
 828
 829    bp = g_malloc(sizeof(*bp));
 830
 831    bp->pc = pc;
 832    bp->flags = flags;
 833
 834    /* keep all GDB-injected breakpoints in front */
 835    if (flags & BP_GDB) {
 836        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 837    } else {
 838        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 839    }
 840
 841    breakpoint_invalidate(cpu, pc);
 842
 843    if (breakpoint) {
 844        *breakpoint = bp;
 845    }
 846    return 0;
 847}
 848
 849/* Remove a specific breakpoint.  */
 850int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 851{
 852    CPUBreakpoint *bp;
 853
 854    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 855        if (bp->pc == pc && bp->flags == flags) {
 856            cpu_breakpoint_remove_by_ref(cpu, bp);
 857            return 0;
 858        }
 859    }
 860    return -ENOENT;
 861}
 862
 863/* Remove a specific breakpoint by reference.  */
 864void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 865{
 866    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 867
 868    breakpoint_invalidate(cpu, breakpoint->pc);
 869
 870    g_free(breakpoint);
 871}
 872
 873/* Remove all matching breakpoints. */
 874void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 875{
 876    CPUBreakpoint *bp, *next;
 877
 878    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 879        if (bp->flags & mask) {
 880            cpu_breakpoint_remove_by_ref(cpu, bp);
 881        }
 882    }
 883}
 884
 885/* enable or disable single step mode. EXCP_DEBUG is returned by the
 886   CPU loop after each instruction */
 887void cpu_single_step(CPUState *cpu, int enabled)
 888{
 889    if (cpu->singlestep_enabled != enabled) {
 890        cpu->singlestep_enabled = enabled;
 891        if (kvm_enabled()) {
 892            kvm_update_guest_debug(cpu, 0);
 893        } else {
 894            /* must flush all the translated code to avoid inconsistencies */
 895            /* XXX: only flush what is necessary */
 896            tb_flush(cpu);
 897        }
 898    }
 899}
 900
 901void cpu_abort(CPUState *cpu, const char *fmt, ...)
 902{
 903    va_list ap;
 904    va_list ap2;
 905
 906    va_start(ap, fmt);
 907    va_copy(ap2, ap);
 908    fprintf(stderr, "qemu: fatal: ");
 909    vfprintf(stderr, fmt, ap);
 910    fprintf(stderr, "\n");
 911    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 912    if (qemu_log_separate()) {
 913        qemu_log("qemu: fatal: ");
 914        qemu_log_vprintf(fmt, ap2);
 915        qemu_log("\n");
 916        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 917        qemu_log_flush();
 918        qemu_log_close();
 919    }
 920    va_end(ap2);
 921    va_end(ap);
 922    replay_finish();
 923#if defined(CONFIG_USER_ONLY)
 924    {
 925        struct sigaction act;
 926        sigfillset(&act.sa_mask);
 927        act.sa_handler = SIG_DFL;
 928        sigaction(SIGABRT, &act, NULL);
 929    }
 930#endif
 931    abort();
 932}
 933
 934#if !defined(CONFIG_USER_ONLY)
 935/* Called from RCU critical section */
 936static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 937{
 938    RAMBlock *block;
 939
 940    block = atomic_rcu_read(&ram_list.mru_block);
 941    if (block && addr - block->offset < block->max_length) {
 942        return block;
 943    }
 944    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 945        if (addr - block->offset < block->max_length) {
 946            goto found;
 947        }
 948    }
 949
 950    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 951    abort();
 952
 953found:
 954    /* It is safe to write mru_block outside the iothread lock.  This
 955     * is what happens:
 956     *
 957     *     mru_block = xxx
 958     *     rcu_read_unlock()
 959     *                                        xxx removed from list
 960     *                  rcu_read_lock()
 961     *                  read mru_block
 962     *                                        mru_block = NULL;
 963     *                                        call_rcu(reclaim_ramblock, xxx);
 964     *                  rcu_read_unlock()
 965     *
 966     * atomic_rcu_set is not needed here.  The block was already published
 967     * when it was placed into the list.  Here we're just making an extra
 968     * copy of the pointer.
 969     */
 970    ram_list.mru_block = block;
 971    return block;
 972}
 973
 974static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 975{
 976    CPUState *cpu;
 977    ram_addr_t start1;
 978    RAMBlock *block;
 979    ram_addr_t end;
 980
 981    end = TARGET_PAGE_ALIGN(start + length);
 982    start &= TARGET_PAGE_MASK;
 983
 984    rcu_read_lock();
 985    block = qemu_get_ram_block(start);
 986    assert(block == qemu_get_ram_block(end - 1));
 987    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 988    CPU_FOREACH(cpu) {
 989        tlb_reset_dirty(cpu, start1, length);
 990    }
 991    rcu_read_unlock();
 992}
 993
 994/* Note: start and end must be within the same ram block.  */
 995bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 996                                              ram_addr_t length,
 997                                              unsigned client)
 998{
 999    DirtyMemoryBlocks *blocks;
1000    unsigned long end, page;
1001    bool dirty = false;
1002
1003    if (length == 0) {
1004        return false;
1005    }
1006
1007    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1008    page = start >> TARGET_PAGE_BITS;
1009
1010    rcu_read_lock();
1011
1012    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1013
1014    while (page < end) {
1015        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1016        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1017        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1018
1019        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1020                                              offset, num);
1021        page += num;
1022    }
1023
1024    rcu_read_unlock();
1025
1026    if (dirty && tcg_enabled()) {
1027        tlb_reset_dirty_range_all(start, length);
1028    }
1029
1030    return dirty;
1031}
1032
1033/* Called from RCU critical section */
1034hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1035                                       MemoryRegionSection *section,
1036                                       target_ulong vaddr,
1037                                       hwaddr paddr, hwaddr xlat,
1038                                       int prot,
1039                                       target_ulong *address)
1040{
1041    hwaddr iotlb;
1042    CPUWatchpoint *wp;
1043
1044    if (memory_region_is_ram(section->mr)) {
1045        /* Normal RAM.  */
1046        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1047            + xlat;
1048        if (!section->readonly) {
1049            iotlb |= PHYS_SECTION_NOTDIRTY;
1050        } else {
1051            iotlb |= PHYS_SECTION_ROM;
1052        }
1053    } else {
1054        AddressSpaceDispatch *d;
1055
1056        d = atomic_rcu_read(&section->address_space->dispatch);
1057        iotlb = section - d->map.sections;
1058        iotlb += xlat;
1059    }
1060
1061    /* Make accesses to pages with watchpoints go via the
1062       watchpoint trap routines.  */
1063    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065            /* Avoid trapping reads of pages with a write breakpoint. */
1066            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067                iotlb = PHYS_SECTION_WATCH + paddr;
1068                *address |= TLB_MMIO;
1069                break;
1070            }
1071        }
1072    }
1073
1074    return iotlb;
1075}
1076#endif /* defined(CONFIG_USER_ONLY) */
1077
1078#if !defined(CONFIG_USER_ONLY)
1079
1080static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081                             uint16_t section);
1082static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083
1084static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085                               qemu_anon_ram_alloc;
1086
1087/*
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this.  Hopefully, we can
1090 * get rid of it eventually.
1091 */
1092void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093{
1094    phys_mem_alloc = alloc;
1095}
1096
1097static uint16_t phys_section_add(PhysPageMap *map,
1098                                 MemoryRegionSection *section)
1099{
1100    /* The physical section number is ORed with a page-aligned
1101     * pointer to produce the iotlb entries.  Thus it should
1102     * never overflow into the page-aligned value.
1103     */
1104    assert(map->sections_nb < TARGET_PAGE_SIZE);
1105
1106    if (map->sections_nb == map->sections_nb_alloc) {
1107        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108        map->sections = g_renew(MemoryRegionSection, map->sections,
1109                                map->sections_nb_alloc);
1110    }
1111    map->sections[map->sections_nb] = *section;
1112    memory_region_ref(section->mr);
1113    return map->sections_nb++;
1114}
1115
1116static void phys_section_destroy(MemoryRegion *mr)
1117{
1118    bool have_sub_page = mr->subpage;
1119
1120    memory_region_unref(mr);
1121
1122    if (have_sub_page) {
1123        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124        object_unref(OBJECT(&subpage->iomem));
1125        g_free(subpage);
1126    }
1127}
1128
1129static void phys_sections_free(PhysPageMap *map)
1130{
1131    while (map->sections_nb > 0) {
1132        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133        phys_section_destroy(section->mr);
1134    }
1135    g_free(map->sections);
1136    g_free(map->nodes);
1137}
1138
1139static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140{
1141    subpage_t *subpage;
1142    hwaddr base = section->offset_within_address_space
1143        & TARGET_PAGE_MASK;
1144    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145                                                   d->map.nodes, d->map.sections);
1146    MemoryRegionSection subsection = {
1147        .offset_within_address_space = base,
1148        .size = int128_make64(TARGET_PAGE_SIZE),
1149    };
1150    hwaddr start, end;
1151
1152    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153
1154    if (!(existing->mr->subpage)) {
1155        subpage = subpage_init(d->as, base);
1156        subsection.address_space = d->as;
1157        subsection.mr = &subpage->iomem;
1158        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159                      phys_section_add(&d->map, &subsection));
1160    } else {
1161        subpage = container_of(existing->mr, subpage_t, iomem);
1162    }
1163    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164    end = start + int128_get64(section->size) - 1;
1165    subpage_register(subpage, start, end,
1166                     phys_section_add(&d->map, section));
1167}
1168
1169
1170static void register_multipage(AddressSpaceDispatch *d,
1171                               MemoryRegionSection *section)
1172{
1173    hwaddr start_addr = section->offset_within_address_space;
1174    uint16_t section_index = phys_section_add(&d->map, section);
1175    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176                                                    TARGET_PAGE_BITS));
1177
1178    assert(num_pages);
1179    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1180}
1181
1182static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183{
1184    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185    AddressSpaceDispatch *d = as->next_dispatch;
1186    MemoryRegionSection now = *section, remain = *section;
1187    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188
1189    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191                       - now.offset_within_address_space;
1192
1193        now.size = int128_min(int128_make64(left), now.size);
1194        register_subpage(d, &now);
1195    } else {
1196        now.size = int128_zero();
1197    }
1198    while (int128_ne(remain.size, now.size)) {
1199        remain.size = int128_sub(remain.size, now.size);
1200        remain.offset_within_address_space += int128_get64(now.size);
1201        remain.offset_within_region += int128_get64(now.size);
1202        now = remain;
1203        if (int128_lt(remain.size, page_size)) {
1204            register_subpage(d, &now);
1205        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206            now.size = page_size;
1207            register_subpage(d, &now);
1208        } else {
1209            now.size = int128_and(now.size, int128_neg(page_size));
1210            register_multipage(d, &now);
1211        }
1212    }
1213}
1214
1215void qemu_flush_coalesced_mmio_buffer(void)
1216{
1217    if (kvm_enabled())
1218        kvm_flush_coalesced_mmio_buffer();
1219}
1220
1221void qemu_mutex_lock_ramlist(void)
1222{
1223    qemu_mutex_lock(&ram_list.mutex);
1224}
1225
1226void qemu_mutex_unlock_ramlist(void)
1227{
1228    qemu_mutex_unlock(&ram_list.mutex);
1229}
1230
1231#ifdef __linux__
1232static void *file_ram_alloc(RAMBlock *block,
1233                            ram_addr_t memory,
1234                            const char *path,
1235                            Error **errp)
1236{
1237    bool unlink_on_error = false;
1238    char *filename;
1239    char *sanitized_name;
1240    char *c;
1241    void *area;
1242    int fd = -1;
1243    int64_t page_size;
1244
1245    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1246        error_setg(errp,
1247                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1248        return NULL;
1249    }
1250
1251    for (;;) {
1252        fd = open(path, O_RDWR);
1253        if (fd >= 0) {
1254            /* @path names an existing file, use it */
1255            break;
1256        }
1257        if (errno == ENOENT) {
1258            /* @path names a file that doesn't exist, create it */
1259            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1260            if (fd >= 0) {
1261                unlink_on_error = true;
1262                break;
1263            }
1264        } else if (errno == EISDIR) {
1265            /* @path names a directory, create a file there */
1266            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1267            sanitized_name = g_strdup(memory_region_name(block->mr));
1268            for (c = sanitized_name; *c != '\0'; c++) {
1269                if (*c == '/') {
1270                    *c = '_';
1271                }
1272            }
1273
1274            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1275                                       sanitized_name);
1276            g_free(sanitized_name);
1277
1278            fd = mkstemp(filename);
1279            if (fd >= 0) {
1280                unlink(filename);
1281                g_free(filename);
1282                break;
1283            }
1284            g_free(filename);
1285        }
1286        if (errno != EEXIST && errno != EINTR) {
1287            error_setg_errno(errp, errno,
1288                             "can't open backing store %s for guest RAM",
1289                             path);
1290            goto error;
1291        }
1292        /*
1293         * Try again on EINTR and EEXIST.  The latter happens when
1294         * something else creates the file between our two open().
1295         */
1296    }
1297
1298    page_size = qemu_fd_getpagesize(fd);
1299    block->mr->align = page_size;
1300
1301    if (memory < page_size) {
1302        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1303                   "or larger than page size 0x%" PRIx64,
1304                   memory, page_size);
1305        goto error;
1306    }
1307
1308    memory = ROUND_UP(memory, page_size);
1309
1310    /*
1311     * ftruncate is not supported by hugetlbfs in older
1312     * hosts, so don't bother bailing out on errors.
1313     * If anything goes wrong with it under other filesystems,
1314     * mmap will fail.
1315     */
1316    if (ftruncate(fd, memory)) {
1317        perror("ftruncate");
1318    }
1319
1320    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1321    if (area == MAP_FAILED) {
1322        error_setg_errno(errp, errno,
1323                         "unable to map backing store for guest RAM");
1324        goto error;
1325    }
1326
1327    if (mem_prealloc) {
1328        os_mem_prealloc(fd, area, memory);
1329    }
1330
1331    block->fd = fd;
1332    return area;
1333
1334error:
1335    if (unlink_on_error) {
1336        unlink(path);
1337    }
1338    if (fd != -1) {
1339        close(fd);
1340    }
1341    return NULL;
1342}
1343#endif
1344
1345/* Called with the ramlist lock held.  */
1346static ram_addr_t find_ram_offset(ram_addr_t size)
1347{
1348    RAMBlock *block, *next_block;
1349    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1350
1351    assert(size != 0); /* it would hand out same offset multiple times */
1352
1353    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1354        return 0;
1355    }
1356
1357    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1358        ram_addr_t end, next = RAM_ADDR_MAX;
1359
1360        end = block->offset + block->max_length;
1361
1362        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1363            if (next_block->offset >= end) {
1364                next = MIN(next, next_block->offset);
1365            }
1366        }
1367        if (next - end >= size && next - end < mingap) {
1368            offset = end;
1369            mingap = next - end;
1370        }
1371    }
1372
1373    if (offset == RAM_ADDR_MAX) {
1374        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1375                (uint64_t)size);
1376        abort();
1377    }
1378
1379    return offset;
1380}
1381
1382ram_addr_t last_ram_offset(void)
1383{
1384    RAMBlock *block;
1385    ram_addr_t last = 0;
1386
1387    rcu_read_lock();
1388    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389        last = MAX(last, block->offset + block->max_length);
1390    }
1391    rcu_read_unlock();
1392    return last;
1393}
1394
1395static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1396{
1397    int ret;
1398
1399    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1400    if (!machine_dump_guest_core(current_machine)) {
1401        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1402        if (ret) {
1403            perror("qemu_madvise");
1404            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1405                            "but dump_guest_core=off specified\n");
1406        }
1407    }
1408}
1409
1410/* Called within an RCU critical section, or while the ramlist lock
1411 * is held.
1412 */
1413static RAMBlock *find_ram_block(ram_addr_t addr)
1414{
1415    RAMBlock *block;
1416
1417    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1418        if (block->offset == addr) {
1419            return block;
1420        }
1421    }
1422
1423    return NULL;
1424}
1425
1426const char *qemu_ram_get_idstr(RAMBlock *rb)
1427{
1428    return rb->idstr;
1429}
1430
1431/* Called with iothread lock held.  */
1432void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1433{
1434    RAMBlock *new_block, *block;
1435
1436    rcu_read_lock();
1437    new_block = find_ram_block(addr);
1438    assert(new_block);
1439    assert(!new_block->idstr[0]);
1440
1441    if (dev) {
1442        char *id = qdev_get_dev_path(dev);
1443        if (id) {
1444            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1445            g_free(id);
1446        }
1447    }
1448    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1449
1450    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1451        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1452            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1453                    new_block->idstr);
1454            abort();
1455        }
1456    }
1457    rcu_read_unlock();
1458}
1459
1460/* Called with iothread lock held.  */
1461void qemu_ram_unset_idstr(ram_addr_t addr)
1462{
1463    RAMBlock *block;
1464
1465    /* FIXME: arch_init.c assumes that this is not called throughout
1466     * migration.  Ignore the problem since hot-unplug during migration
1467     * does not work anyway.
1468     */
1469
1470    rcu_read_lock();
1471    block = find_ram_block(addr);
1472    if (block) {
1473        memset(block->idstr, 0, sizeof(block->idstr));
1474    }
1475    rcu_read_unlock();
1476}
1477
1478static int memory_try_enable_merging(void *addr, size_t len)
1479{
1480    if (!machine_mem_merge(current_machine)) {
1481        /* disabled by the user */
1482        return 0;
1483    }
1484
1485    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1486}
1487
1488/* Only legal before guest might have detected the memory size: e.g. on
1489 * incoming migration, or right after reset.
1490 *
1491 * As memory core doesn't know how is memory accessed, it is up to
1492 * resize callback to update device state and/or add assertions to detect
1493 * misuse, if necessary.
1494 */
1495int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1496{
1497    RAMBlock *block = find_ram_block(base);
1498
1499    assert(block);
1500
1501    newsize = HOST_PAGE_ALIGN(newsize);
1502
1503    if (block->used_length == newsize) {
1504        return 0;
1505    }
1506
1507    if (!(block->flags & RAM_RESIZEABLE)) {
1508        error_setg_errno(errp, EINVAL,
1509                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1510                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1511                         newsize, block->used_length);
1512        return -EINVAL;
1513    }
1514
1515    if (block->max_length < newsize) {
1516        error_setg_errno(errp, EINVAL,
1517                         "Length too large: %s: 0x" RAM_ADDR_FMT
1518                         " > 0x" RAM_ADDR_FMT, block->idstr,
1519                         newsize, block->max_length);
1520        return -EINVAL;
1521    }
1522
1523    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1524    block->used_length = newsize;
1525    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1526                                        DIRTY_CLIENTS_ALL);
1527    memory_region_set_size(block->mr, newsize);
1528    if (block->resized) {
1529        block->resized(block->idstr, newsize, block->host);
1530    }
1531    return 0;
1532}
1533
1534/* Called with ram_list.mutex held */
1535static void dirty_memory_extend(ram_addr_t old_ram_size,
1536                                ram_addr_t new_ram_size)
1537{
1538    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1539                                             DIRTY_MEMORY_BLOCK_SIZE);
1540    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1541                                             DIRTY_MEMORY_BLOCK_SIZE);
1542    int i;
1543
1544    /* Only need to extend if block count increased */
1545    if (new_num_blocks <= old_num_blocks) {
1546        return;
1547    }
1548
1549    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1550        DirtyMemoryBlocks *old_blocks;
1551        DirtyMemoryBlocks *new_blocks;
1552        int j;
1553
1554        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1555        new_blocks = g_malloc(sizeof(*new_blocks) +
1556                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1557
1558        if (old_num_blocks) {
1559            memcpy(new_blocks->blocks, old_blocks->blocks,
1560                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1561        }
1562
1563        for (j = old_num_blocks; j < new_num_blocks; j++) {
1564            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1565        }
1566
1567        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1568
1569        if (old_blocks) {
1570            g_free_rcu(old_blocks, rcu);
1571        }
1572    }
1573}
1574
1575static void ram_block_add(RAMBlock *new_block, Error **errp)
1576{
1577    RAMBlock *block;
1578    RAMBlock *last_block = NULL;
1579    ram_addr_t old_ram_size, new_ram_size;
1580    Error *err = NULL;
1581
1582    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1583
1584    qemu_mutex_lock_ramlist();
1585    new_block->offset = find_ram_offset(new_block->max_length);
1586
1587    if (!new_block->host) {
1588        if (xen_enabled()) {
1589            xen_ram_alloc(new_block->offset, new_block->max_length,
1590                          new_block->mr, &err);
1591            if (err) {
1592                error_propagate(errp, err);
1593                qemu_mutex_unlock_ramlist();
1594                return;
1595            }
1596        } else {
1597            new_block->host = phys_mem_alloc(new_block->max_length,
1598                                             &new_block->mr->align);
1599            if (!new_block->host) {
1600                error_setg_errno(errp, errno,
1601                                 "cannot set up guest memory '%s'",
1602                                 memory_region_name(new_block->mr));
1603                qemu_mutex_unlock_ramlist();
1604                return;
1605            }
1606            memory_try_enable_merging(new_block->host, new_block->max_length);
1607        }
1608    }
1609
1610    new_ram_size = MAX(old_ram_size,
1611              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1612    if (new_ram_size > old_ram_size) {
1613        migration_bitmap_extend(old_ram_size, new_ram_size);
1614        dirty_memory_extend(old_ram_size, new_ram_size);
1615    }
1616    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1617     * QLIST (which has an RCU-friendly variant) does not have insertion at
1618     * tail, so save the last element in last_block.
1619     */
1620    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1621        last_block = block;
1622        if (block->max_length < new_block->max_length) {
1623            break;
1624        }
1625    }
1626    if (block) {
1627        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1628    } else if (last_block) {
1629        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1630    } else { /* list is empty */
1631        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1632    }
1633    ram_list.mru_block = NULL;
1634
1635    /* Write list before version */
1636    smp_wmb();
1637    ram_list.version++;
1638    qemu_mutex_unlock_ramlist();
1639
1640    cpu_physical_memory_set_dirty_range(new_block->offset,
1641                                        new_block->used_length,
1642                                        DIRTY_CLIENTS_ALL);
1643
1644    if (new_block->host) {
1645        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1646        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1647        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1648        if (kvm_enabled()) {
1649            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1650        }
1651    }
1652}
1653
1654#ifdef __linux__
1655RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1656                                   bool share, const char *mem_path,
1657                                   Error **errp)
1658{
1659    RAMBlock *new_block;
1660    Error *local_err = NULL;
1661
1662    if (xen_enabled()) {
1663        error_setg(errp, "-mem-path not supported with Xen");
1664        return NULL;
1665    }
1666
1667    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1668        /*
1669         * file_ram_alloc() needs to allocate just like
1670         * phys_mem_alloc, but we haven't bothered to provide
1671         * a hook there.
1672         */
1673        error_setg(errp,
1674                   "-mem-path not supported with this accelerator");
1675        return NULL;
1676    }
1677
1678    size = HOST_PAGE_ALIGN(size);
1679    new_block = g_malloc0(sizeof(*new_block));
1680    new_block->mr = mr;
1681    new_block->used_length = size;
1682    new_block->max_length = size;
1683    new_block->flags = share ? RAM_SHARED : 0;
1684    new_block->host = file_ram_alloc(new_block, size,
1685                                     mem_path, errp);
1686    if (!new_block->host) {
1687        g_free(new_block);
1688        return NULL;
1689    }
1690
1691    ram_block_add(new_block, &local_err);
1692    if (local_err) {
1693        g_free(new_block);
1694        error_propagate(errp, local_err);
1695        return NULL;
1696    }
1697    return new_block;
1698}
1699#endif
1700
1701static
1702RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1703                                  void (*resized)(const char*,
1704                                                  uint64_t length,
1705                                                  void *host),
1706                                  void *host, bool resizeable,
1707                                  MemoryRegion *mr, Error **errp)
1708{
1709    RAMBlock *new_block;
1710    Error *local_err = NULL;
1711
1712    size = HOST_PAGE_ALIGN(size);
1713    max_size = HOST_PAGE_ALIGN(max_size);
1714    new_block = g_malloc0(sizeof(*new_block));
1715    new_block->mr = mr;
1716    new_block->resized = resized;
1717    new_block->used_length = size;
1718    new_block->max_length = max_size;
1719    assert(max_size >= size);
1720    new_block->fd = -1;
1721    new_block->host = host;
1722    if (host) {
1723        new_block->flags |= RAM_PREALLOC;
1724    }
1725    if (resizeable) {
1726        new_block->flags |= RAM_RESIZEABLE;
1727    }
1728    ram_block_add(new_block, &local_err);
1729    if (local_err) {
1730        g_free(new_block);
1731        error_propagate(errp, local_err);
1732        return NULL;
1733    }
1734    return new_block;
1735}
1736
1737RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1738                                   MemoryRegion *mr, Error **errp)
1739{
1740    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1741}
1742
1743RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1744{
1745    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1746}
1747
1748RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1749                                     void (*resized)(const char*,
1750                                                     uint64_t length,
1751                                                     void *host),
1752                                     MemoryRegion *mr, Error **errp)
1753{
1754    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1755}
1756
1757static void reclaim_ramblock(RAMBlock *block)
1758{
1759    if (block->flags & RAM_PREALLOC) {
1760        ;
1761    } else if (xen_enabled()) {
1762        xen_invalidate_map_cache_entry(block->host);
1763#ifndef _WIN32
1764    } else if (block->fd >= 0) {
1765        qemu_ram_munmap(block->host, block->max_length);
1766        close(block->fd);
1767#endif
1768    } else {
1769        qemu_anon_ram_free(block->host, block->max_length);
1770    }
1771    g_free(block);
1772}
1773
1774void qemu_ram_free(RAMBlock *block)
1775{
1776    if (!block) {
1777        return;
1778    }
1779
1780    qemu_mutex_lock_ramlist();
1781    QLIST_REMOVE_RCU(block, next);
1782    ram_list.mru_block = NULL;
1783    /* Write list before version */
1784    smp_wmb();
1785    ram_list.version++;
1786    call_rcu(block, reclaim_ramblock, rcu);
1787    qemu_mutex_unlock_ramlist();
1788}
1789
1790#ifndef _WIN32
1791void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1792{
1793    RAMBlock *block;
1794    ram_addr_t offset;
1795    int flags;
1796    void *area, *vaddr;
1797
1798    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1799        offset = addr - block->offset;
1800        if (offset < block->max_length) {
1801            vaddr = ramblock_ptr(block, offset);
1802            if (block->flags & RAM_PREALLOC) {
1803                ;
1804            } else if (xen_enabled()) {
1805                abort();
1806            } else {
1807                flags = MAP_FIXED;
1808                if (block->fd >= 0) {
1809                    flags |= (block->flags & RAM_SHARED ?
1810                              MAP_SHARED : MAP_PRIVATE);
1811                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1812                                flags, block->fd, offset);
1813                } else {
1814                    /*
1815                     * Remap needs to match alloc.  Accelerators that
1816                     * set phys_mem_alloc never remap.  If they did,
1817                     * we'd need a remap hook here.
1818                     */
1819                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1820
1821                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1822                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1823                                flags, -1, 0);
1824                }
1825                if (area != vaddr) {
1826                    fprintf(stderr, "Could not remap addr: "
1827                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1828                            length, addr);
1829                    exit(1);
1830                }
1831                memory_try_enable_merging(vaddr, length);
1832                qemu_ram_setup_dump(vaddr, length);
1833            }
1834        }
1835    }
1836}
1837#endif /* !_WIN32 */
1838
1839int qemu_get_ram_fd(ram_addr_t addr)
1840{
1841    RAMBlock *block;
1842    int fd;
1843
1844    rcu_read_lock();
1845    block = qemu_get_ram_block(addr);
1846    fd = block->fd;
1847    rcu_read_unlock();
1848    return fd;
1849}
1850
1851void qemu_set_ram_fd(ram_addr_t addr, int fd)
1852{
1853    RAMBlock *block;
1854
1855    rcu_read_lock();
1856    block = qemu_get_ram_block(addr);
1857    block->fd = fd;
1858    rcu_read_unlock();
1859}
1860
1861void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1862{
1863    RAMBlock *block;
1864    void *ptr;
1865
1866    rcu_read_lock();
1867    block = qemu_get_ram_block(addr);
1868    ptr = ramblock_ptr(block, 0);
1869    rcu_read_unlock();
1870    return ptr;
1871}
1872
1873/* Return a host pointer to ram allocated with qemu_ram_alloc.
1874 * This should not be used for general purpose DMA.  Use address_space_map
1875 * or address_space_rw instead. For local memory (e.g. video ram) that the
1876 * device owns, use memory_region_get_ram_ptr.
1877 *
1878 * Called within RCU critical section.
1879 */
1880void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1881{
1882    RAMBlock *block = ram_block;
1883
1884    if (block == NULL) {
1885        block = qemu_get_ram_block(addr);
1886    }
1887
1888    if (xen_enabled() && block->host == NULL) {
1889        /* We need to check if the requested address is in the RAM
1890         * because we don't want to map the entire memory in QEMU.
1891         * In that case just map until the end of the page.
1892         */
1893        if (block->offset == 0) {
1894            return xen_map_cache(addr, 0, 0);
1895        }
1896
1897        block->host = xen_map_cache(block->offset, block->max_length, 1);
1898    }
1899    return ramblock_ptr(block, addr - block->offset);
1900}
1901
1902/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1903 * but takes a size argument.
1904 *
1905 * Called within RCU critical section.
1906 */
1907static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1908                                 hwaddr *size)
1909{
1910    RAMBlock *block = ram_block;
1911    ram_addr_t offset_inside_block;
1912    if (*size == 0) {
1913        return NULL;
1914    }
1915
1916    if (block == NULL) {
1917        block = qemu_get_ram_block(addr);
1918    }
1919    offset_inside_block = addr - block->offset;
1920    *size = MIN(*size, block->max_length - offset_inside_block);
1921
1922    if (xen_enabled() && block->host == NULL) {
1923        /* We need to check if the requested address is in the RAM
1924         * because we don't want to map the entire memory in QEMU.
1925         * In that case just map the requested area.
1926         */
1927        if (block->offset == 0) {
1928            return xen_map_cache(addr, *size, 1);
1929        }
1930
1931        block->host = xen_map_cache(block->offset, block->max_length, 1);
1932    }
1933
1934    return ramblock_ptr(block, offset_inside_block);
1935}
1936
1937/*
1938 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1939 * in that RAMBlock.
1940 *
1941 * ptr: Host pointer to look up
1942 * round_offset: If true round the result offset down to a page boundary
1943 * *ram_addr: set to result ram_addr
1944 * *offset: set to result offset within the RAMBlock
1945 *
1946 * Returns: RAMBlock (or NULL if not found)
1947 *
1948 * By the time this function returns, the returned pointer is not protected
1949 * by RCU anymore.  If the caller is not within an RCU critical section and
1950 * does not hold the iothread lock, it must have other means of protecting the
1951 * pointer, such as a reference to the region that includes the incoming
1952 * ram_addr_t.
1953 */
1954RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1955                                   ram_addr_t *ram_addr,
1956                                   ram_addr_t *offset)
1957{
1958    RAMBlock *block;
1959    uint8_t *host = ptr;
1960
1961    if (xen_enabled()) {
1962        rcu_read_lock();
1963        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1964        block = qemu_get_ram_block(*ram_addr);
1965        if (block) {
1966            *offset = (host - block->host);
1967        }
1968        rcu_read_unlock();
1969        return block;
1970    }
1971
1972    rcu_read_lock();
1973    block = atomic_rcu_read(&ram_list.mru_block);
1974    if (block && block->host && host - block->host < block->max_length) {
1975        goto found;
1976    }
1977
1978    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1979        /* This case append when the block is not mapped. */
1980        if (block->host == NULL) {
1981            continue;
1982        }
1983        if (host - block->host < block->max_length) {
1984            goto found;
1985        }
1986    }
1987
1988    rcu_read_unlock();
1989    return NULL;
1990
1991found:
1992    *offset = (host - block->host);
1993    if (round_offset) {
1994        *offset &= TARGET_PAGE_MASK;
1995    }
1996    *ram_addr = block->offset + *offset;
1997    rcu_read_unlock();
1998    return block;
1999}
2000
2001/*
2002 * Finds the named RAMBlock
2003 *
2004 * name: The name of RAMBlock to find
2005 *
2006 * Returns: RAMBlock (or NULL if not found)
2007 */
2008RAMBlock *qemu_ram_block_by_name(const char *name)
2009{
2010    RAMBlock *block;
2011
2012    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2013        if (!strcmp(name, block->idstr)) {
2014            return block;
2015        }
2016    }
2017
2018    return NULL;
2019}
2020
2021/* Some of the softmmu routines need to translate from a host pointer
2022   (typically a TLB entry) back to a ram offset.  */
2023MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2024{
2025    RAMBlock *block;
2026    ram_addr_t offset; /* Not used */
2027
2028    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2029
2030    if (!block) {
2031        return NULL;
2032    }
2033
2034    return block->mr;
2035}
2036
2037/* Called within RCU critical section.  */
2038static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2039                               uint64_t val, unsigned size)
2040{
2041    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2042        tb_invalidate_phys_page_fast(ram_addr, size);
2043    }
2044    switch (size) {
2045    case 1:
2046        stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2047        break;
2048    case 2:
2049        stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2050        break;
2051    case 4:
2052        stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2053        break;
2054    default:
2055        abort();
2056    }
2057    /* Set both VGA and migration bits for simplicity and to remove
2058     * the notdirty callback faster.
2059     */
2060    cpu_physical_memory_set_dirty_range(ram_addr, size,
2061                                        DIRTY_CLIENTS_NOCODE);
2062    /* we remove the notdirty callback only if the code has been
2063       flushed */
2064    if (!cpu_physical_memory_is_clean(ram_addr)) {
2065        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2066    }
2067}
2068
2069static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2070                                 unsigned size, bool is_write)
2071{
2072    return is_write;
2073}
2074
2075static const MemoryRegionOps notdirty_mem_ops = {
2076    .write = notdirty_mem_write,
2077    .valid.accepts = notdirty_mem_accepts,
2078    .endianness = DEVICE_NATIVE_ENDIAN,
2079};
2080
2081/* Generate a debug exception if a watchpoint has been hit.  */
2082static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2083{
2084    CPUState *cpu = current_cpu;
2085    CPUClass *cc = CPU_GET_CLASS(cpu);
2086    CPUArchState *env = cpu->env_ptr;
2087    target_ulong pc, cs_base;
2088    target_ulong vaddr;
2089    CPUWatchpoint *wp;
2090    int cpu_flags;
2091
2092    if (cpu->watchpoint_hit) {
2093        /* We re-entered the check after replacing the TB. Now raise
2094         * the debug interrupt so that is will trigger after the
2095         * current instruction. */
2096        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2097        return;
2098    }
2099    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2100    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2101        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2102            && (wp->flags & flags)) {
2103            if (flags == BP_MEM_READ) {
2104                wp->flags |= BP_WATCHPOINT_HIT_READ;
2105            } else {
2106                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2107            }
2108            wp->hitaddr = vaddr;
2109            wp->hitattrs = attrs;
2110            if (!cpu->watchpoint_hit) {
2111                if (wp->flags & BP_CPU &&
2112                    !cc->debug_check_watchpoint(cpu, wp)) {
2113                    wp->flags &= ~BP_WATCHPOINT_HIT;
2114                    continue;
2115                }
2116                cpu->watchpoint_hit = wp;
2117                tb_check_watchpoint(cpu);
2118                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2119                    cpu->exception_index = EXCP_DEBUG;
2120                    cpu_loop_exit(cpu);
2121                } else {
2122                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2123                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2124                    cpu_resume_from_signal(cpu, NULL);
2125                }
2126            }
2127        } else {
2128            wp->flags &= ~BP_WATCHPOINT_HIT;
2129        }
2130    }
2131}
2132
2133/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2134   so these check for a hit then pass through to the normal out-of-line
2135   phys routines.  */
2136static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2137                                  unsigned size, MemTxAttrs attrs)
2138{
2139    MemTxResult res;
2140    uint64_t data;
2141    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2142    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2143
2144    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2145    switch (size) {
2146    case 1:
2147        data = address_space_ldub(as, addr, attrs, &res);
2148        break;
2149    case 2:
2150        data = address_space_lduw(as, addr, attrs, &res);
2151        break;
2152    case 4:
2153        data = address_space_ldl(as, addr, attrs, &res);
2154        break;
2155    default: abort();
2156    }
2157    *pdata = data;
2158    return res;
2159}
2160
2161static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2162                                   uint64_t val, unsigned size,
2163                                   MemTxAttrs attrs)
2164{
2165    MemTxResult res;
2166    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2167    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2168
2169    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2170    switch (size) {
2171    case 1:
2172        address_space_stb(as, addr, val, attrs, &res);
2173        break;
2174    case 2:
2175        address_space_stw(as, addr, val, attrs, &res);
2176        break;
2177    case 4:
2178        address_space_stl(as, addr, val, attrs, &res);
2179        break;
2180    default: abort();
2181    }
2182    return res;
2183}
2184
2185static const MemoryRegionOps watch_mem_ops = {
2186    .read_with_attrs = watch_mem_read,
2187    .write_with_attrs = watch_mem_write,
2188    .endianness = DEVICE_NATIVE_ENDIAN,
2189};
2190
2191static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2192                                unsigned len, MemTxAttrs attrs)
2193{
2194    subpage_t *subpage = opaque;
2195    uint8_t buf[8];
2196    MemTxResult res;
2197
2198#if defined(DEBUG_SUBPAGE)
2199    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2200           subpage, len, addr);
2201#endif
2202    res = address_space_read(subpage->as, addr + subpage->base,
2203                             attrs, buf, len);
2204    if (res) {
2205        return res;
2206    }
2207    switch (len) {
2208    case 1:
2209        *data = ldub_p(buf);
2210        return MEMTX_OK;
2211    case 2:
2212        *data = lduw_p(buf);
2213        return MEMTX_OK;
2214    case 4:
2215        *data = ldl_p(buf);
2216        return MEMTX_OK;
2217    case 8:
2218        *data = ldq_p(buf);
2219        return MEMTX_OK;
2220    default:
2221        abort();
2222    }
2223}
2224
2225static MemTxResult subpage_write(void *opaque, hwaddr addr,
2226                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2227{
2228    subpage_t *subpage = opaque;
2229    uint8_t buf[8];
2230
2231#if defined(DEBUG_SUBPAGE)
2232    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2233           " value %"PRIx64"\n",
2234           __func__, subpage, len, addr, value);
2235#endif
2236    switch (len) {
2237    case 1:
2238        stb_p(buf, value);
2239        break;
2240    case 2:
2241        stw_p(buf, value);
2242        break;
2243    case 4:
2244        stl_p(buf, value);
2245        break;
2246    case 8:
2247        stq_p(buf, value);
2248        break;
2249    default:
2250        abort();
2251    }
2252    return address_space_write(subpage->as, addr + subpage->base,
2253                               attrs, buf, len);
2254}
2255
2256static bool subpage_accepts(void *opaque, hwaddr addr,
2257                            unsigned len, bool is_write)
2258{
2259    subpage_t *subpage = opaque;
2260#if defined(DEBUG_SUBPAGE)
2261    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2262           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2263#endif
2264
2265    return address_space_access_valid(subpage->as, addr + subpage->base,
2266                                      len, is_write);
2267}
2268
2269static const MemoryRegionOps subpage_ops = {
2270    .read_with_attrs = subpage_read,
2271    .write_with_attrs = subpage_write,
2272    .impl.min_access_size = 1,
2273    .impl.max_access_size = 8,
2274    .valid.min_access_size = 1,
2275    .valid.max_access_size = 8,
2276    .valid.accepts = subpage_accepts,
2277    .endianness = DEVICE_NATIVE_ENDIAN,
2278};
2279
2280static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2281                             uint16_t section)
2282{
2283    int idx, eidx;
2284
2285    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2286        return -1;
2287    idx = SUBPAGE_IDX(start);
2288    eidx = SUBPAGE_IDX(end);
2289#if defined(DEBUG_SUBPAGE)
2290    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2291           __func__, mmio, start, end, idx, eidx, section);
2292#endif
2293    for (; idx <= eidx; idx++) {
2294        mmio->sub_section[idx] = section;
2295    }
2296
2297    return 0;
2298}
2299
2300static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2301{
2302    subpage_t *mmio;
2303
2304    mmio = g_malloc0(sizeof(subpage_t));
2305
2306    mmio->as = as;
2307    mmio->base = base;
2308    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2309                          NULL, TARGET_PAGE_SIZE);
2310    mmio->iomem.subpage = true;
2311#if defined(DEBUG_SUBPAGE)
2312    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2313           mmio, base, TARGET_PAGE_SIZE);
2314#endif
2315    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2316
2317    return mmio;
2318}
2319
2320static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2321                              MemoryRegion *mr)
2322{
2323    assert(as);
2324    MemoryRegionSection section = {
2325        .address_space = as,
2326        .mr = mr,
2327        .offset_within_address_space = 0,
2328        .offset_within_region = 0,
2329        .size = int128_2_64(),
2330    };
2331
2332    return phys_section_add(map, &section);
2333}
2334
2335MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2336{
2337    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2338    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2339    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2340    MemoryRegionSection *sections = d->map.sections;
2341
2342    return sections[index & ~TARGET_PAGE_MASK].mr;
2343}
2344
2345static void io_mem_init(void)
2346{
2347    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2348    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2349                          NULL, UINT64_MAX);
2350    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2351                          NULL, UINT64_MAX);
2352    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2353                          NULL, UINT64_MAX);
2354}
2355
2356static void mem_begin(MemoryListener *listener)
2357{
2358    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2359    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2360    uint16_t n;
2361
2362    n = dummy_section(&d->map, as, &io_mem_unassigned);
2363    assert(n == PHYS_SECTION_UNASSIGNED);
2364    n = dummy_section(&d->map, as, &io_mem_notdirty);
2365    assert(n == PHYS_SECTION_NOTDIRTY);
2366    n = dummy_section(&d->map, as, &io_mem_rom);
2367    assert(n == PHYS_SECTION_ROM);
2368    n = dummy_section(&d->map, as, &io_mem_watch);
2369    assert(n == PHYS_SECTION_WATCH);
2370
2371    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2372    d->as = as;
2373    as->next_dispatch = d;
2374}
2375
2376static void address_space_dispatch_free(AddressSpaceDispatch *d)
2377{
2378    phys_sections_free(&d->map);
2379    g_free(d);
2380}
2381
2382static void mem_commit(MemoryListener *listener)
2383{
2384    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2385    AddressSpaceDispatch *cur = as->dispatch;
2386    AddressSpaceDispatch *next = as->next_dispatch;
2387
2388    phys_page_compact_all(next, next->map.nodes_nb);
2389
2390    atomic_rcu_set(&as->dispatch, next);
2391    if (cur) {
2392        call_rcu(cur, address_space_dispatch_free, rcu);
2393    }
2394}
2395
2396static void tcg_commit(MemoryListener *listener)
2397{
2398    CPUAddressSpace *cpuas;
2399    AddressSpaceDispatch *d;
2400
2401    /* since each CPU stores ram addresses in its TLB cache, we must
2402       reset the modified entries */
2403    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2404    cpu_reloading_memory_map();
2405    /* The CPU and TLB are protected by the iothread lock.
2406     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2407     * may have split the RCU critical section.
2408     */
2409    d = atomic_rcu_read(&cpuas->as->dispatch);
2410    cpuas->memory_dispatch = d;
2411    tlb_flush(cpuas->cpu, 1);
2412}
2413
2414void address_space_init_dispatch(AddressSpace *as)
2415{
2416    as->dispatch = NULL;
2417    as->dispatch_listener = (MemoryListener) {
2418        .begin = mem_begin,
2419        .commit = mem_commit,
2420        .region_add = mem_add,
2421        .region_nop = mem_add,
2422        .priority = 0,
2423    };
2424    memory_listener_register(&as->dispatch_listener, as);
2425}
2426
2427void address_space_unregister(AddressSpace *as)
2428{
2429    memory_listener_unregister(&as->dispatch_listener);
2430}
2431
2432void address_space_destroy_dispatch(AddressSpace *as)
2433{
2434    AddressSpaceDispatch *d = as->dispatch;
2435
2436    atomic_rcu_set(&as->dispatch, NULL);
2437    if (d) {
2438        call_rcu(d, address_space_dispatch_free, rcu);
2439    }
2440}
2441
2442static void memory_map_init(void)
2443{
2444    system_memory = g_malloc(sizeof(*system_memory));
2445
2446    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2447    address_space_init(&address_space_memory, system_memory, "memory");
2448
2449    system_io = g_malloc(sizeof(*system_io));
2450    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2451                          65536);
2452    address_space_init(&address_space_io, system_io, "I/O");
2453}
2454
2455MemoryRegion *get_system_memory(void)
2456{
2457    return system_memory;
2458}
2459
2460MemoryRegion *get_system_io(void)
2461{
2462    return system_io;
2463}
2464
2465#endif /* !defined(CONFIG_USER_ONLY) */
2466
2467/* physical memory access (slow version, mainly for debug) */
2468#if defined(CONFIG_USER_ONLY)
2469int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2470                        uint8_t *buf, int len, int is_write)
2471{
2472    int l, flags;
2473    target_ulong page;
2474    void * p;
2475
2476    while (len > 0) {
2477        page = addr & TARGET_PAGE_MASK;
2478        l = (page + TARGET_PAGE_SIZE) - addr;
2479        if (l > len)
2480            l = len;
2481        flags = page_get_flags(page);
2482        if (!(flags & PAGE_VALID))
2483            return -1;
2484        if (is_write) {
2485            if (!(flags & PAGE_WRITE))
2486                return -1;
2487            /* XXX: this code should not depend on lock_user */
2488            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2489                return -1;
2490            memcpy(p, buf, l);
2491            unlock_user(p, addr, l);
2492        } else {
2493            if (!(flags & PAGE_READ))
2494                return -1;
2495            /* XXX: this code should not depend on lock_user */
2496            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2497                return -1;
2498            memcpy(buf, p, l);
2499            unlock_user(p, addr, 0);
2500        }
2501        len -= l;
2502        buf += l;
2503        addr += l;
2504    }
2505    return 0;
2506}
2507
2508#else
2509
2510static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2511                                     hwaddr length)
2512{
2513    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2514    /* No early return if dirty_log_mask is or becomes 0, because
2515     * cpu_physical_memory_set_dirty_range will still call
2516     * xen_modified_memory.
2517     */
2518    if (dirty_log_mask) {
2519        dirty_log_mask =
2520            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2521    }
2522    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2523        tb_invalidate_phys_range(addr, addr + length);
2524        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2525    }
2526    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2527}
2528
2529static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2530{
2531    unsigned access_size_max = mr->ops->valid.max_access_size;
2532
2533    /* Regions are assumed to support 1-4 byte accesses unless
2534       otherwise specified.  */
2535    if (access_size_max == 0) {
2536        access_size_max = 4;
2537    }
2538
2539    /* Bound the maximum access by the alignment of the address.  */
2540    if (!mr->ops->impl.unaligned) {
2541        unsigned align_size_max = addr & -addr;
2542        if (align_size_max != 0 && align_size_max < access_size_max) {
2543            access_size_max = align_size_max;
2544        }
2545    }
2546
2547    /* Don't attempt accesses larger than the maximum.  */
2548    if (l > access_size_max) {
2549        l = access_size_max;
2550    }
2551    l = pow2floor(l);
2552
2553    return l;
2554}
2555
2556static bool prepare_mmio_access(MemoryRegion *mr)
2557{
2558    bool unlocked = !qemu_mutex_iothread_locked();
2559    bool release_lock = false;
2560
2561    if (unlocked && mr->global_locking) {
2562        qemu_mutex_lock_iothread();
2563        unlocked = false;
2564        release_lock = true;
2565    }
2566    if (mr->flush_coalesced_mmio) {
2567        if (unlocked) {
2568            qemu_mutex_lock_iothread();
2569        }
2570        qemu_flush_coalesced_mmio_buffer();
2571        if (unlocked) {
2572            qemu_mutex_unlock_iothread();
2573        }
2574    }
2575
2576    return release_lock;
2577}
2578
2579/* Called within RCU critical section.  */
2580static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2581                                                MemTxAttrs attrs,
2582                                                const uint8_t *buf,
2583                                                int len, hwaddr addr1,
2584                                                hwaddr l, MemoryRegion *mr)
2585{
2586    uint8_t *ptr;
2587    uint64_t val;
2588    MemTxResult result = MEMTX_OK;
2589    bool release_lock = false;
2590
2591    for (;;) {
2592        if (!memory_access_is_direct(mr, true)) {
2593            release_lock |= prepare_mmio_access(mr);
2594            l = memory_access_size(mr, l, addr1);
2595            /* XXX: could force current_cpu to NULL to avoid
2596               potential bugs */
2597            switch (l) {
2598            case 8:
2599                /* 64 bit write access */
2600                val = ldq_p(buf);
2601                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2602                                                       attrs);
2603                break;
2604            case 4:
2605                /* 32 bit write access */
2606                val = ldl_p(buf);
2607                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2608                                                       attrs);
2609                break;
2610            case 2:
2611                /* 16 bit write access */
2612                val = lduw_p(buf);
2613                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2614                                                       attrs);
2615                break;
2616            case 1:
2617                /* 8 bit write access */
2618                val = ldub_p(buf);
2619                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2620                                                       attrs);
2621                break;
2622            default:
2623                abort();
2624            }
2625        } else {
2626            addr1 += memory_region_get_ram_addr(mr);
2627            /* RAM case */
2628            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2629            memcpy(ptr, buf, l);
2630            invalidate_and_set_dirty(mr, addr1, l);
2631        }
2632
2633        if (release_lock) {
2634            qemu_mutex_unlock_iothread();
2635            release_lock = false;
2636        }
2637
2638        len -= l;
2639        buf += l;
2640        addr += l;
2641
2642        if (!len) {
2643            break;
2644        }
2645
2646        l = len;
2647        mr = address_space_translate(as, addr, &addr1, &l, true);
2648    }
2649
2650    return result;
2651}
2652
2653MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2654                                const uint8_t *buf, int len)
2655{
2656    hwaddr l;
2657    hwaddr addr1;
2658    MemoryRegion *mr;
2659    MemTxResult result = MEMTX_OK;
2660
2661    if (len > 0) {
2662        rcu_read_lock();
2663        l = len;
2664        mr = address_space_translate(as, addr, &addr1, &l, true);
2665        result = address_space_write_continue(as, addr, attrs, buf, len,
2666                                              addr1, l, mr);
2667        rcu_read_unlock();
2668    }
2669
2670    return result;
2671}
2672
2673/* Called within RCU critical section.  */
2674MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2675                                        MemTxAttrs attrs, uint8_t *buf,
2676                                        int len, hwaddr addr1, hwaddr l,
2677                                        MemoryRegion *mr)
2678{
2679    uint8_t *ptr;
2680    uint64_t val;
2681    MemTxResult result = MEMTX_OK;
2682    bool release_lock = false;
2683
2684    for (;;) {
2685        if (!memory_access_is_direct(mr, false)) {
2686            /* I/O case */
2687            release_lock |= prepare_mmio_access(mr);
2688            l = memory_access_size(mr, l, addr1);
2689            switch (l) {
2690            case 8:
2691                /* 64 bit read access */
2692                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2693                                                      attrs);
2694                stq_p(buf, val);
2695                break;
2696            case 4:
2697                /* 32 bit read access */
2698                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2699                                                      attrs);
2700                stl_p(buf, val);
2701                break;
2702            case 2:
2703                /* 16 bit read access */
2704                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2705                                                      attrs);
2706                stw_p(buf, val);
2707                break;
2708            case 1:
2709                /* 8 bit read access */
2710                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2711                                                      attrs);
2712                stb_p(buf, val);
2713                break;
2714            default:
2715                abort();
2716            }
2717        } else {
2718            /* RAM case */
2719            ptr = qemu_get_ram_ptr(mr->ram_block,
2720                                   memory_region_get_ram_addr(mr) + addr1);
2721            memcpy(buf, ptr, l);
2722        }
2723
2724        if (release_lock) {
2725            qemu_mutex_unlock_iothread();
2726            release_lock = false;
2727        }
2728
2729        len -= l;
2730        buf += l;
2731        addr += l;
2732
2733        if (!len) {
2734            break;
2735        }
2736
2737        l = len;
2738        mr = address_space_translate(as, addr, &addr1, &l, false);
2739    }
2740
2741    return result;
2742}
2743
2744MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2745                                    MemTxAttrs attrs, uint8_t *buf, int len)
2746{
2747    hwaddr l;
2748    hwaddr addr1;
2749    MemoryRegion *mr;
2750    MemTxResult result = MEMTX_OK;
2751
2752    if (len > 0) {
2753        rcu_read_lock();
2754        l = len;
2755        mr = address_space_translate(as, addr, &addr1, &l, false);
2756        result = address_space_read_continue(as, addr, attrs, buf, len,
2757                                             addr1, l, mr);
2758        rcu_read_unlock();
2759    }
2760
2761    return result;
2762}
2763
2764MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2765                             uint8_t *buf, int len, bool is_write)
2766{
2767    if (is_write) {
2768        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2769    } else {
2770        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2771    }
2772}
2773
2774void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2775                            int len, int is_write)
2776{
2777    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2778                     buf, len, is_write);
2779}
2780
2781enum write_rom_type {
2782    WRITE_DATA,
2783    FLUSH_CACHE,
2784};
2785
2786static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2787    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2788{
2789    hwaddr l;
2790    uint8_t *ptr;
2791    hwaddr addr1;
2792    MemoryRegion *mr;
2793
2794    rcu_read_lock();
2795    while (len > 0) {
2796        l = len;
2797        mr = address_space_translate(as, addr, &addr1, &l, true);
2798
2799        if (!(memory_region_is_ram(mr) ||
2800              memory_region_is_romd(mr))) {
2801            l = memory_access_size(mr, l, addr1);
2802        } else {
2803            addr1 += memory_region_get_ram_addr(mr);
2804            /* ROM/RAM case */
2805            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2806            switch (type) {
2807            case WRITE_DATA:
2808                memcpy(ptr, buf, l);
2809                invalidate_and_set_dirty(mr, addr1, l);
2810                break;
2811            case FLUSH_CACHE:
2812                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2813                break;
2814            }
2815        }
2816        len -= l;
2817        buf += l;
2818        addr += l;
2819    }
2820    rcu_read_unlock();
2821}
2822
2823/* used for ROM loading : can write in RAM and ROM */
2824void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2825                                   const uint8_t *buf, int len)
2826{
2827    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2828}
2829
2830void cpu_flush_icache_range(hwaddr start, int len)
2831{
2832    /*
2833     * This function should do the same thing as an icache flush that was
2834     * triggered from within the guest. For TCG we are always cache coherent,
2835     * so there is no need to flush anything. For KVM / Xen we need to flush
2836     * the host's instruction cache at least.
2837     */
2838    if (tcg_enabled()) {
2839        return;
2840    }
2841
2842    cpu_physical_memory_write_rom_internal(&address_space_memory,
2843                                           start, NULL, len, FLUSH_CACHE);
2844}
2845
2846typedef struct {
2847    MemoryRegion *mr;
2848    void *buffer;
2849    hwaddr addr;
2850    hwaddr len;
2851    bool in_use;
2852} BounceBuffer;
2853
2854static BounceBuffer bounce;
2855
2856typedef struct MapClient {
2857    QEMUBH *bh;
2858    QLIST_ENTRY(MapClient) link;
2859} MapClient;
2860
2861QemuMutex map_client_list_lock;
2862static QLIST_HEAD(map_client_list, MapClient) map_client_list
2863    = QLIST_HEAD_INITIALIZER(map_client_list);
2864
2865static void cpu_unregister_map_client_do(MapClient *client)
2866{
2867    QLIST_REMOVE(client, link);
2868    g_free(client);
2869}
2870
2871static void cpu_notify_map_clients_locked(void)
2872{
2873    MapClient *client;
2874
2875    while (!QLIST_EMPTY(&map_client_list)) {
2876        client = QLIST_FIRST(&map_client_list);
2877        qemu_bh_schedule(client->bh);
2878        cpu_unregister_map_client_do(client);
2879    }
2880}
2881
2882void cpu_register_map_client(QEMUBH *bh)
2883{
2884    MapClient *client = g_malloc(sizeof(*client));
2885
2886    qemu_mutex_lock(&map_client_list_lock);
2887    client->bh = bh;
2888    QLIST_INSERT_HEAD(&map_client_list, client, link);
2889    if (!atomic_read(&bounce.in_use)) {
2890        cpu_notify_map_clients_locked();
2891    }
2892    qemu_mutex_unlock(&map_client_list_lock);
2893}
2894
2895void cpu_exec_init_all(void)
2896{
2897    qemu_mutex_init(&ram_list.mutex);
2898    io_mem_init();
2899    memory_map_init();
2900    qemu_mutex_init(&map_client_list_lock);
2901}
2902
2903void cpu_unregister_map_client(QEMUBH *bh)
2904{
2905    MapClient *client;
2906
2907    qemu_mutex_lock(&map_client_list_lock);
2908    QLIST_FOREACH(client, &map_client_list, link) {
2909        if (client->bh == bh) {
2910            cpu_unregister_map_client_do(client);
2911            break;
2912        }
2913    }
2914    qemu_mutex_unlock(&map_client_list_lock);
2915}
2916
2917static void cpu_notify_map_clients(void)
2918{
2919    qemu_mutex_lock(&map_client_list_lock);
2920    cpu_notify_map_clients_locked();
2921    qemu_mutex_unlock(&map_client_list_lock);
2922}
2923
2924bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2925{
2926    MemoryRegion *mr;
2927    hwaddr l, xlat;
2928
2929    rcu_read_lock();
2930    while (len > 0) {
2931        l = len;
2932        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2933        if (!memory_access_is_direct(mr, is_write)) {
2934            l = memory_access_size(mr, l, addr);
2935            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2936                return false;
2937            }
2938        }
2939
2940        len -= l;
2941        addr += l;
2942    }
2943    rcu_read_unlock();
2944    return true;
2945}
2946
2947/* Map a physical memory region into a host virtual address.
2948 * May map a subset of the requested range, given by and returned in *plen.
2949 * May return NULL if resources needed to perform the mapping are exhausted.
2950 * Use only for reads OR writes - not for read-modify-write operations.
2951 * Use cpu_register_map_client() to know when retrying the map operation is
2952 * likely to succeed.
2953 */
2954void *address_space_map(AddressSpace *as,
2955                        hwaddr addr,
2956                        hwaddr *plen,
2957                        bool is_write)
2958{
2959    hwaddr len = *plen;
2960    hwaddr done = 0;
2961    hwaddr l, xlat, base;
2962    MemoryRegion *mr, *this_mr;
2963    ram_addr_t raddr;
2964    void *ptr;
2965
2966    if (len == 0) {
2967        return NULL;
2968    }
2969
2970    l = len;
2971    rcu_read_lock();
2972    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2973
2974    if (!memory_access_is_direct(mr, is_write)) {
2975        if (atomic_xchg(&bounce.in_use, true)) {
2976            rcu_read_unlock();
2977            return NULL;
2978        }
2979        /* Avoid unbounded allocations */
2980        l = MIN(l, TARGET_PAGE_SIZE);
2981        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2982        bounce.addr = addr;
2983        bounce.len = l;
2984
2985        memory_region_ref(mr);
2986        bounce.mr = mr;
2987        if (!is_write) {
2988            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2989                               bounce.buffer, l);
2990        }
2991
2992        rcu_read_unlock();
2993        *plen = l;
2994        return bounce.buffer;
2995    }
2996
2997    base = xlat;
2998    raddr = memory_region_get_ram_addr(mr);
2999
3000    for (;;) {
3001        len -= l;
3002        addr += l;
3003        done += l;
3004        if (len == 0) {
3005            break;
3006        }
3007
3008        l = len;
3009        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3010        if (this_mr != mr || xlat != base + done) {
3011            break;
3012        }
3013    }
3014
3015    memory_region_ref(mr);
3016    *plen = done;
3017    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3018    rcu_read_unlock();
3019
3020    return ptr;
3021}
3022
3023/* Unmaps a memory region previously mapped by address_space_map().
3024 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3025 * the amount of memory that was actually read or written by the caller.
3026 */
3027void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3028                         int is_write, hwaddr access_len)
3029{
3030    if (buffer != bounce.buffer) {
3031        MemoryRegion *mr;
3032        ram_addr_t addr1;
3033
3034        mr = qemu_ram_addr_from_host(buffer, &addr1);
3035        assert(mr != NULL);
3036        if (is_write) {
3037            invalidate_and_set_dirty(mr, addr1, access_len);
3038        }
3039        if (xen_enabled()) {
3040            xen_invalidate_map_cache_entry(buffer);
3041        }
3042        memory_region_unref(mr);
3043        return;
3044    }
3045    if (is_write) {
3046        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3047                            bounce.buffer, access_len);
3048    }
3049    qemu_vfree(bounce.buffer);
3050    bounce.buffer = NULL;
3051    memory_region_unref(bounce.mr);
3052    atomic_mb_set(&bounce.in_use, false);
3053    cpu_notify_map_clients();
3054}
3055
3056void *cpu_physical_memory_map(hwaddr addr,
3057                              hwaddr *plen,
3058                              int is_write)
3059{
3060    return address_space_map(&address_space_memory, addr, plen, is_write);
3061}
3062
3063void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3064                               int is_write, hwaddr access_len)
3065{
3066    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3067}
3068
3069/* warning: addr must be aligned */
3070static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3071                                                  MemTxAttrs attrs,
3072                                                  MemTxResult *result,
3073                                                  enum device_endian endian)
3074{
3075    uint8_t *ptr;
3076    uint64_t val;
3077    MemoryRegion *mr;
3078    hwaddr l = 4;
3079    hwaddr addr1;
3080    MemTxResult r;
3081    bool release_lock = false;
3082
3083    rcu_read_lock();
3084    mr = address_space_translate(as, addr, &addr1, &l, false);
3085    if (l < 4 || !memory_access_is_direct(mr, false)) {
3086        release_lock |= prepare_mmio_access(mr);
3087
3088        /* I/O case */
3089        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3090#if defined(TARGET_WORDS_BIGENDIAN)
3091        if (endian == DEVICE_LITTLE_ENDIAN) {
3092            val = bswap32(val);
3093        }
3094#else
3095        if (endian == DEVICE_BIG_ENDIAN) {
3096            val = bswap32(val);
3097        }
3098#endif
3099    } else {
3100        /* RAM case */
3101        ptr = qemu_get_ram_ptr(mr->ram_block,
3102                               (memory_region_get_ram_addr(mr)
3103                                & TARGET_PAGE_MASK)
3104                               + addr1);
3105        switch (endian) {
3106        case DEVICE_LITTLE_ENDIAN:
3107            val = ldl_le_p(ptr);
3108            break;
3109        case DEVICE_BIG_ENDIAN:
3110            val = ldl_be_p(ptr);
3111            break;
3112        default:
3113            val = ldl_p(ptr);
3114            break;
3115        }
3116        r = MEMTX_OK;
3117    }
3118    if (result) {
3119        *result = r;
3120    }
3121    if (release_lock) {
3122        qemu_mutex_unlock_iothread();
3123    }
3124    rcu_read_unlock();
3125    return val;
3126}
3127
3128uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3129                           MemTxAttrs attrs, MemTxResult *result)
3130{
3131    return address_space_ldl_internal(as, addr, attrs, result,
3132                                      DEVICE_NATIVE_ENDIAN);
3133}
3134
3135uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3136                              MemTxAttrs attrs, MemTxResult *result)
3137{
3138    return address_space_ldl_internal(as, addr, attrs, result,
3139                                      DEVICE_LITTLE_ENDIAN);
3140}
3141
3142uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3143                              MemTxAttrs attrs, MemTxResult *result)
3144{
3145    return address_space_ldl_internal(as, addr, attrs, result,
3146                                      DEVICE_BIG_ENDIAN);
3147}
3148
3149uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3150{
3151    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3152}
3153
3154uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3155{
3156    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3157}
3158
3159uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3160{
3161    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3162}
3163
3164/* warning: addr must be aligned */
3165static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3166                                                  MemTxAttrs attrs,
3167                                                  MemTxResult *result,
3168                                                  enum device_endian endian)
3169{
3170    uint8_t *ptr;
3171    uint64_t val;
3172    MemoryRegion *mr;
3173    hwaddr l = 8;
3174    hwaddr addr1;
3175    MemTxResult r;
3176    bool release_lock = false;
3177
3178    rcu_read_lock();
3179    mr = address_space_translate(as, addr, &addr1, &l,
3180                                 false);
3181    if (l < 8 || !memory_access_is_direct(mr, false)) {
3182        release_lock |= prepare_mmio_access(mr);
3183
3184        /* I/O case */
3185        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3186#if defined(TARGET_WORDS_BIGENDIAN)
3187        if (endian == DEVICE_LITTLE_ENDIAN) {
3188            val = bswap64(val);
3189        }
3190#else
3191        if (endian == DEVICE_BIG_ENDIAN) {
3192            val = bswap64(val);
3193        }
3194#endif
3195    } else {
3196        /* RAM case */
3197        ptr = qemu_get_ram_ptr(mr->ram_block,
3198                               (memory_region_get_ram_addr(mr)
3199                                & TARGET_PAGE_MASK)
3200                               + addr1);
3201        switch (endian) {
3202        case DEVICE_LITTLE_ENDIAN:
3203            val = ldq_le_p(ptr);
3204            break;
3205        case DEVICE_BIG_ENDIAN:
3206            val = ldq_be_p(ptr);
3207            break;
3208        default:
3209            val = ldq_p(ptr);
3210            break;
3211        }
3212        r = MEMTX_OK;
3213    }
3214    if (result) {
3215        *result = r;
3216    }
3217    if (release_lock) {
3218        qemu_mutex_unlock_iothread();
3219    }
3220    rcu_read_unlock();
3221    return val;
3222}
3223
3224uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3225                           MemTxAttrs attrs, MemTxResult *result)
3226{
3227    return address_space_ldq_internal(as, addr, attrs, result,
3228                                      DEVICE_NATIVE_ENDIAN);
3229}
3230
3231uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3232                           MemTxAttrs attrs, MemTxResult *result)
3233{
3234    return address_space_ldq_internal(as, addr, attrs, result,
3235                                      DEVICE_LITTLE_ENDIAN);
3236}
3237
3238uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3239                           MemTxAttrs attrs, MemTxResult *result)
3240{
3241    return address_space_ldq_internal(as, addr, attrs, result,
3242                                      DEVICE_BIG_ENDIAN);
3243}
3244
3245uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3246{
3247    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3248}
3249
3250uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3251{
3252    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3253}
3254
3255uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3256{
3257    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3258}
3259
3260/* XXX: optimize */
3261uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3262                            MemTxAttrs attrs, MemTxResult *result)
3263{
3264    uint8_t val;
3265    MemTxResult r;
3266
3267    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3268    if (result) {
3269        *result = r;
3270    }
3271    return val;
3272}
3273
3274uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3275{
3276    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3277}
3278
3279/* warning: addr must be aligned */
3280static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3281                                                   hwaddr addr,
3282                                                   MemTxAttrs attrs,
3283                                                   MemTxResult *result,
3284                                                   enum device_endian endian)
3285{
3286    uint8_t *ptr;
3287    uint64_t val;
3288    MemoryRegion *mr;
3289    hwaddr l = 2;
3290    hwaddr addr1;
3291    MemTxResult r;
3292    bool release_lock = false;
3293
3294    rcu_read_lock();
3295    mr = address_space_translate(as, addr, &addr1, &l,
3296                                 false);
3297    if (l < 2 || !memory_access_is_direct(mr, false)) {
3298        release_lock |= prepare_mmio_access(mr);
3299
3300        /* I/O case */
3301        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3302#if defined(TARGET_WORDS_BIGENDIAN)
3303        if (endian == DEVICE_LITTLE_ENDIAN) {
3304            val = bswap16(val);
3305        }
3306#else
3307        if (endian == DEVICE_BIG_ENDIAN) {
3308            val = bswap16(val);
3309        }
3310#endif
3311    } else {
3312        /* RAM case */
3313        ptr = qemu_get_ram_ptr(mr->ram_block,
3314                               (memory_region_get_ram_addr(mr)
3315                                & TARGET_PAGE_MASK)
3316                               + addr1);
3317        switch (endian) {
3318        case DEVICE_LITTLE_ENDIAN:
3319            val = lduw_le_p(ptr);
3320            break;
3321        case DEVICE_BIG_ENDIAN:
3322            val = lduw_be_p(ptr);
3323            break;
3324        default:
3325            val = lduw_p(ptr);
3326            break;
3327        }
3328        r = MEMTX_OK;
3329    }
3330    if (result) {
3331        *result = r;
3332    }
3333    if (release_lock) {
3334        qemu_mutex_unlock_iothread();
3335    }
3336    rcu_read_unlock();
3337    return val;
3338}
3339
3340uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3341                           MemTxAttrs attrs, MemTxResult *result)
3342{
3343    return address_space_lduw_internal(as, addr, attrs, result,
3344                                       DEVICE_NATIVE_ENDIAN);
3345}
3346
3347uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3348                           MemTxAttrs attrs, MemTxResult *result)
3349{
3350    return address_space_lduw_internal(as, addr, attrs, result,
3351                                       DEVICE_LITTLE_ENDIAN);
3352}
3353
3354uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3355                           MemTxAttrs attrs, MemTxResult *result)
3356{
3357    return address_space_lduw_internal(as, addr, attrs, result,
3358                                       DEVICE_BIG_ENDIAN);
3359}
3360
3361uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3362{
3363    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3364}
3365
3366uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3367{
3368    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3369}
3370
3371uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3372{
3373    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3374}
3375
3376/* warning: addr must be aligned. The ram page is not masked as dirty
3377   and the code inside is not invalidated. It is useful if the dirty
3378   bits are used to track modified PTEs */
3379void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3380                                MemTxAttrs attrs, MemTxResult *result)
3381{
3382    uint8_t *ptr;
3383    MemoryRegion *mr;
3384    hwaddr l = 4;
3385    hwaddr addr1;
3386    MemTxResult r;
3387    uint8_t dirty_log_mask;
3388    bool release_lock = false;
3389
3390    rcu_read_lock();
3391    mr = address_space_translate(as, addr, &addr1, &l,
3392                                 true);
3393    if (l < 4 || !memory_access_is_direct(mr, true)) {
3394        release_lock |= prepare_mmio_access(mr);
3395
3396        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3397    } else {
3398        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3399        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3400        stl_p(ptr, val);
3401
3402        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3403        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3404        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3405        r = MEMTX_OK;
3406    }
3407    if (result) {
3408        *result = r;
3409    }
3410    if (release_lock) {
3411        qemu_mutex_unlock_iothread();
3412    }
3413    rcu_read_unlock();
3414}
3415
3416void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3417{
3418    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3419}
3420
3421/* warning: addr must be aligned */
3422static inline void address_space_stl_internal(AddressSpace *as,
3423                                              hwaddr addr, uint32_t val,
3424                                              MemTxAttrs attrs,
3425                                              MemTxResult *result,
3426                                              enum device_endian endian)
3427{
3428    uint8_t *ptr;
3429    MemoryRegion *mr;
3430    hwaddr l = 4;
3431    hwaddr addr1;
3432    MemTxResult r;
3433    bool release_lock = false;
3434
3435    rcu_read_lock();
3436    mr = address_space_translate(as, addr, &addr1, &l,
3437                                 true);
3438    if (l < 4 || !memory_access_is_direct(mr, true)) {
3439        release_lock |= prepare_mmio_access(mr);
3440
3441#if defined(TARGET_WORDS_BIGENDIAN)
3442        if (endian == DEVICE_LITTLE_ENDIAN) {
3443            val = bswap32(val);
3444        }
3445#else
3446        if (endian == DEVICE_BIG_ENDIAN) {
3447            val = bswap32(val);
3448        }
3449#endif
3450        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3451    } else {
3452        /* RAM case */
3453        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3454        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3455        switch (endian) {
3456        case DEVICE_LITTLE_ENDIAN:
3457            stl_le_p(ptr, val);
3458            break;
3459        case DEVICE_BIG_ENDIAN:
3460            stl_be_p(ptr, val);
3461            break;
3462        default:
3463            stl_p(ptr, val);
3464            break;
3465        }
3466        invalidate_and_set_dirty(mr, addr1, 4);
3467        r = MEMTX_OK;
3468    }
3469    if (result) {
3470        *result = r;
3471    }
3472    if (release_lock) {
3473        qemu_mutex_unlock_iothread();
3474    }
3475    rcu_read_unlock();
3476}
3477
3478void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3479                       MemTxAttrs attrs, MemTxResult *result)
3480{
3481    address_space_stl_internal(as, addr, val, attrs, result,
3482                               DEVICE_NATIVE_ENDIAN);
3483}
3484
3485void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3486                       MemTxAttrs attrs, MemTxResult *result)
3487{
3488    address_space_stl_internal(as, addr, val, attrs, result,
3489                               DEVICE_LITTLE_ENDIAN);
3490}
3491
3492void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3493                       MemTxAttrs attrs, MemTxResult *result)
3494{
3495    address_space_stl_internal(as, addr, val, attrs, result,
3496                               DEVICE_BIG_ENDIAN);
3497}
3498
3499void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3500{
3501    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3502}
3503
3504void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3505{
3506    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507}
3508
3509void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3510{
3511    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3512}
3513
3514/* XXX: optimize */
3515void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3516                       MemTxAttrs attrs, MemTxResult *result)
3517{
3518    uint8_t v = val;
3519    MemTxResult r;
3520
3521    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3522    if (result) {
3523        *result = r;
3524    }
3525}
3526
3527void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3528{
3529    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3530}
3531
3532/* warning: addr must be aligned */
3533static inline void address_space_stw_internal(AddressSpace *as,
3534                                              hwaddr addr, uint32_t val,
3535                                              MemTxAttrs attrs,
3536                                              MemTxResult *result,
3537                                              enum device_endian endian)
3538{
3539    uint8_t *ptr;
3540    MemoryRegion *mr;
3541    hwaddr l = 2;
3542    hwaddr addr1;
3543    MemTxResult r;
3544    bool release_lock = false;
3545
3546    rcu_read_lock();
3547    mr = address_space_translate(as, addr, &addr1, &l, true);
3548    if (l < 2 || !memory_access_is_direct(mr, true)) {
3549        release_lock |= prepare_mmio_access(mr);
3550
3551#if defined(TARGET_WORDS_BIGENDIAN)
3552        if (endian == DEVICE_LITTLE_ENDIAN) {
3553            val = bswap16(val);
3554        }
3555#else
3556        if (endian == DEVICE_BIG_ENDIAN) {
3557            val = bswap16(val);
3558        }
3559#endif
3560        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3561    } else {
3562        /* RAM case */
3563        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3564        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3565        switch (endian) {
3566        case DEVICE_LITTLE_ENDIAN:
3567            stw_le_p(ptr, val);
3568            break;
3569        case DEVICE_BIG_ENDIAN:
3570            stw_be_p(ptr, val);
3571            break;
3572        default:
3573            stw_p(ptr, val);
3574            break;
3575        }
3576        invalidate_and_set_dirty(mr, addr1, 2);
3577        r = MEMTX_OK;
3578    }
3579    if (result) {
3580        *result = r;
3581    }
3582    if (release_lock) {
3583        qemu_mutex_unlock_iothread();
3584    }
3585    rcu_read_unlock();
3586}
3587
3588void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3589                       MemTxAttrs attrs, MemTxResult *result)
3590{
3591    address_space_stw_internal(as, addr, val, attrs, result,
3592                               DEVICE_NATIVE_ENDIAN);
3593}
3594
3595void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3596                       MemTxAttrs attrs, MemTxResult *result)
3597{
3598    address_space_stw_internal(as, addr, val, attrs, result,
3599                               DEVICE_LITTLE_ENDIAN);
3600}
3601
3602void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3603                       MemTxAttrs attrs, MemTxResult *result)
3604{
3605    address_space_stw_internal(as, addr, val, attrs, result,
3606                               DEVICE_BIG_ENDIAN);
3607}
3608
3609void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3610{
3611    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3612}
3613
3614void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3615{
3616    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3617}
3618
3619void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3620{
3621    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3622}
3623
3624/* XXX: optimize */
3625void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3626                       MemTxAttrs attrs, MemTxResult *result)
3627{
3628    MemTxResult r;
3629    val = tswap64(val);
3630    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3631    if (result) {
3632        *result = r;
3633    }
3634}
3635
3636void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3637                       MemTxAttrs attrs, MemTxResult *result)
3638{
3639    MemTxResult r;
3640    val = cpu_to_le64(val);
3641    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3642    if (result) {
3643        *result = r;
3644    }
3645}
3646void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3647                       MemTxAttrs attrs, MemTxResult *result)
3648{
3649    MemTxResult r;
3650    val = cpu_to_be64(val);
3651    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3652    if (result) {
3653        *result = r;
3654    }
3655}
3656
3657void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3658{
3659    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3660}
3661
3662void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3663{
3664    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3665}
3666
3667void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3668{
3669    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3670}
3671
3672/* virtual memory access for debug (includes writing to ROM) */
3673int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3674                        uint8_t *buf, int len, int is_write)
3675{
3676    int l;
3677    hwaddr phys_addr;
3678    target_ulong page;
3679
3680    while (len > 0) {
3681        int asidx;
3682        MemTxAttrs attrs;
3683
3684        page = addr & TARGET_PAGE_MASK;
3685        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3686        asidx = cpu_asidx_from_attrs(cpu, attrs);
3687        /* if no physical page mapped, return an error */
3688        if (phys_addr == -1)
3689            return -1;
3690        l = (page + TARGET_PAGE_SIZE) - addr;
3691        if (l > len)
3692            l = len;
3693        phys_addr += (addr & ~TARGET_PAGE_MASK);
3694        if (is_write) {
3695            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3696                                          phys_addr, buf, l);
3697        } else {
3698            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3699                             MEMTXATTRS_UNSPECIFIED,
3700                             buf, l, 0);
3701        }
3702        len -= l;
3703        buf += l;
3704        addr += l;
3705    }
3706    return 0;
3707}
3708
3709/*
3710 * Allows code that needs to deal with migration bitmaps etc to still be built
3711 * target independent.
3712 */
3713size_t qemu_target_page_bits(void)
3714{
3715    return TARGET_PAGE_BITS;
3716}
3717
3718#endif
3719
3720/*
3721 * A helper function for the _utterly broken_ virtio device model to find out if
3722 * it's running on a big endian machine. Don't do this at home kids!
3723 */
3724bool target_words_bigendian(void);
3725bool target_words_bigendian(void)
3726{
3727#if defined(TARGET_WORDS_BIGENDIAN)
3728    return true;
3729#else
3730    return false;
3731#endif
3732}
3733
3734#ifndef CONFIG_USER_ONLY
3735bool cpu_physical_memory_is_io(hwaddr phys_addr)
3736{
3737    MemoryRegion*mr;
3738    hwaddr l = 1;
3739    bool res;
3740
3741    rcu_read_lock();
3742    mr = address_space_translate(&address_space_memory,
3743                                 phys_addr, &phys_addr, &l, false);
3744
3745    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3746    rcu_read_unlock();
3747    return res;
3748}
3749
3750int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3751{
3752    RAMBlock *block;
3753    int ret = 0;
3754
3755    rcu_read_lock();
3756    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3757        ret = func(block->idstr, block->host, block->offset,
3758                   block->used_length, opaque);
3759        if (ret) {
3760            break;
3761        }
3762    }
3763    rcu_read_unlock();
3764    return ret;
3765}
3766#endif
3767