qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu/cutils.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "hw/xen/xen.h"
  36#include "qemu/timer.h"
  37#include "qemu/config-file.h"
  38#include "qemu/error-report.h"
  39#include "exec/memory.h"
  40#include "sysemu/dma.h"
  41#include "exec/address-spaces.h"
  42#if defined(CONFIG_USER_ONLY)
  43#include <qemu.h>
  44#else /* !CONFIG_USER_ONLY */
  45#include "sysemu/xen-mapcache.h"
  46#include "trace.h"
  47#endif
  48#include "exec/cpu-all.h"
  49#include "qemu/rcu_queue.h"
  50#include "qemu/main-loop.h"
  51#include "translate-all.h"
  52#include "sysemu/replay.h"
  53
  54#include "exec/memory-internal.h"
  55#include "exec/ram_addr.h"
  56#include "exec/log.h"
  57
  58#include "qemu/range.h"
  59#ifndef _WIN32
  60#include "qemu/mmap-alloc.h"
  61#endif
  62
  63//#define DEBUG_SUBPAGE
  64
  65#if !defined(CONFIG_USER_ONLY)
  66/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67 * are protected by the ramlist lock.
  68 */
  69RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71static MemoryRegion *system_memory;
  72static MemoryRegion *system_io;
  73
  74AddressSpace address_space_io;
  75AddressSpace address_space_memory;
  76
  77MemoryRegion io_mem_rom, io_mem_notdirty;
  78static MemoryRegion io_mem_unassigned;
  79
  80/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81#define RAM_PREALLOC   (1 << 0)
  82
  83/* RAM is mmap-ed with MAP_SHARED */
  84#define RAM_SHARED     (1 << 1)
  85
  86/* Only a portion of RAM (used_length) is actually used, and migrated.
  87 * This used_length size can change across reboots.
  88 */
  89#define RAM_RESIZEABLE (1 << 2)
  90
  91#endif
  92
  93struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  94/* current CPU in the current thread. It is only valid inside
  95   cpu_exec() */
  96__thread CPUState *current_cpu;
  97/* 0 = Do not count executed instructions.
  98   1 = Precise instruction counting.
  99   2 = Adaptive rate instruction counting.  */
 100int use_icount;
 101
 102#if !defined(CONFIG_USER_ONLY)
 103
 104typedef struct PhysPageEntry PhysPageEntry;
 105
 106struct PhysPageEntry {
 107    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 108    uint32_t skip : 6;
 109     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 110    uint32_t ptr : 26;
 111};
 112
 113#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 114
 115/* Size of the L2 (and L3, etc) page tables.  */
 116#define ADDR_SPACE_BITS 64
 117
 118#define P_L2_BITS 9
 119#define P_L2_SIZE (1 << P_L2_BITS)
 120
 121#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 122
 123typedef PhysPageEntry Node[P_L2_SIZE];
 124
 125typedef struct PhysPageMap {
 126    struct rcu_head rcu;
 127
 128    unsigned sections_nb;
 129    unsigned sections_nb_alloc;
 130    unsigned nodes_nb;
 131    unsigned nodes_nb_alloc;
 132    Node *nodes;
 133    MemoryRegionSection *sections;
 134} PhysPageMap;
 135
 136struct AddressSpaceDispatch {
 137    struct rcu_head rcu;
 138
 139    MemoryRegionSection *mru_section;
 140    /* This is a multi-level map on the physical address space.
 141     * The bottom level has pointers to MemoryRegionSections.
 142     */
 143    PhysPageEntry phys_map;
 144    PhysPageMap map;
 145    AddressSpace *as;
 146};
 147
 148#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 149typedef struct subpage_t {
 150    MemoryRegion iomem;
 151    AddressSpace *as;
 152    hwaddr base;
 153    uint16_t sub_section[TARGET_PAGE_SIZE];
 154} subpage_t;
 155
 156#define PHYS_SECTION_UNASSIGNED 0
 157#define PHYS_SECTION_NOTDIRTY 1
 158#define PHYS_SECTION_ROM 2
 159#define PHYS_SECTION_WATCH 3
 160
 161static void io_mem_init(void);
 162static void memory_map_init(void);
 163static void tcg_commit(MemoryListener *listener);
 164
 165static MemoryRegion io_mem_watch;
 166
 167/**
 168 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 169 * @cpu: the CPU whose AddressSpace this is
 170 * @as: the AddressSpace itself
 171 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 172 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 173 */
 174struct CPUAddressSpace {
 175    CPUState *cpu;
 176    AddressSpace *as;
 177    struct AddressSpaceDispatch *memory_dispatch;
 178    MemoryListener tcg_as_listener;
 179};
 180
 181#endif
 182
 183#if !defined(CONFIG_USER_ONLY)
 184
 185static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 186{
 187    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 188        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 189        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 190        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 191    }
 192}
 193
 194static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 195{
 196    unsigned i;
 197    uint32_t ret;
 198    PhysPageEntry e;
 199    PhysPageEntry *p;
 200
 201    ret = map->nodes_nb++;
 202    p = map->nodes[ret];
 203    assert(ret != PHYS_MAP_NODE_NIL);
 204    assert(ret != map->nodes_nb_alloc);
 205
 206    e.skip = leaf ? 0 : 1;
 207    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 208    for (i = 0; i < P_L2_SIZE; ++i) {
 209        memcpy(&p[i], &e, sizeof(e));
 210    }
 211    return ret;
 212}
 213
 214static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 215                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 216                                int level)
 217{
 218    PhysPageEntry *p;
 219    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 220
 221    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 222        lp->ptr = phys_map_node_alloc(map, level == 0);
 223    }
 224    p = map->nodes[lp->ptr];
 225    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 226
 227    while (*nb && lp < &p[P_L2_SIZE]) {
 228        if ((*index & (step - 1)) == 0 && *nb >= step) {
 229            lp->skip = 0;
 230            lp->ptr = leaf;
 231            *index += step;
 232            *nb -= step;
 233        } else {
 234            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 235        }
 236        ++lp;
 237    }
 238}
 239
 240static void phys_page_set(AddressSpaceDispatch *d,
 241                          hwaddr index, hwaddr nb,
 242                          uint16_t leaf)
 243{
 244    /* Wildly overreserve - it doesn't matter much. */
 245    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 246
 247    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 248}
 249
 250/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 251 * and update our entry so we can skip it and go directly to the destination.
 252 */
 253static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 254{
 255    unsigned valid_ptr = P_L2_SIZE;
 256    int valid = 0;
 257    PhysPageEntry *p;
 258    int i;
 259
 260    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 261        return;
 262    }
 263
 264    p = nodes[lp->ptr];
 265    for (i = 0; i < P_L2_SIZE; i++) {
 266        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 267            continue;
 268        }
 269
 270        valid_ptr = i;
 271        valid++;
 272        if (p[i].skip) {
 273            phys_page_compact(&p[i], nodes, compacted);
 274        }
 275    }
 276
 277    /* We can only compress if there's only one child. */
 278    if (valid != 1) {
 279        return;
 280    }
 281
 282    assert(valid_ptr < P_L2_SIZE);
 283
 284    /* Don't compress if it won't fit in the # of bits we have. */
 285    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 286        return;
 287    }
 288
 289    lp->ptr = p[valid_ptr].ptr;
 290    if (!p[valid_ptr].skip) {
 291        /* If our only child is a leaf, make this a leaf. */
 292        /* By design, we should have made this node a leaf to begin with so we
 293         * should never reach here.
 294         * But since it's so simple to handle this, let's do it just in case we
 295         * change this rule.
 296         */
 297        lp->skip = 0;
 298    } else {
 299        lp->skip += p[valid_ptr].skip;
 300    }
 301}
 302
 303static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 304{
 305    DECLARE_BITMAP(compacted, nodes_nb);
 306
 307    if (d->phys_map.skip) {
 308        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 309    }
 310}
 311
 312static inline bool section_covers_addr(const MemoryRegionSection *section,
 313                                       hwaddr addr)
 314{
 315    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 316     * the section must cover the entire address space.
 317     */
 318    return section->size.hi ||
 319           range_covers_byte(section->offset_within_address_space,
 320                             section->size.lo, addr);
 321}
 322
 323static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 324                                           Node *nodes, MemoryRegionSection *sections)
 325{
 326    PhysPageEntry *p;
 327    hwaddr index = addr >> TARGET_PAGE_BITS;
 328    int i;
 329
 330    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 331        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 332            return &sections[PHYS_SECTION_UNASSIGNED];
 333        }
 334        p = nodes[lp.ptr];
 335        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 336    }
 337
 338    if (section_covers_addr(&sections[lp.ptr], addr)) {
 339        return &sections[lp.ptr];
 340    } else {
 341        return &sections[PHYS_SECTION_UNASSIGNED];
 342    }
 343}
 344
 345bool memory_region_is_unassigned(MemoryRegion *mr)
 346{
 347    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 348        && mr != &io_mem_watch;
 349}
 350
 351/* Called from RCU critical section */
 352static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 353                                                        hwaddr addr,
 354                                                        bool resolve_subpage)
 355{
 356    MemoryRegionSection *section = atomic_read(&d->mru_section);
 357    subpage_t *subpage;
 358    bool update;
 359
 360    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 361        section_covers_addr(section, addr)) {
 362        update = false;
 363    } else {
 364        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 365                                 d->map.sections);
 366        update = true;
 367    }
 368    if (resolve_subpage && section->mr->subpage) {
 369        subpage = container_of(section->mr, subpage_t, iomem);
 370        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 371    }
 372    if (update) {
 373        atomic_set(&d->mru_section, section);
 374    }
 375    return section;
 376}
 377
 378/* Called from RCU critical section */
 379static MemoryRegionSection *
 380address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 381                                 hwaddr *plen, bool resolve_subpage)
 382{
 383    MemoryRegionSection *section;
 384    MemoryRegion *mr;
 385    Int128 diff;
 386
 387    section = address_space_lookup_region(d, addr, resolve_subpage);
 388    /* Compute offset within MemoryRegionSection */
 389    addr -= section->offset_within_address_space;
 390
 391    /* Compute offset within MemoryRegion */
 392    *xlat = addr + section->offset_within_region;
 393
 394    mr = section->mr;
 395
 396    /* MMIO registers can be expected to perform full-width accesses based only
 397     * on their address, without considering adjacent registers that could
 398     * decode to completely different MemoryRegions.  When such registers
 399     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 400     * regions overlap wildly.  For this reason we cannot clamp the accesses
 401     * here.
 402     *
 403     * If the length is small (as is the case for address_space_ldl/stl),
 404     * everything works fine.  If the incoming length is large, however,
 405     * the caller really has to do the clamping through memory_access_size.
 406     */
 407    if (memory_region_is_ram(mr)) {
 408        diff = int128_sub(section->size, int128_make64(addr));
 409        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 410    }
 411    return section;
 412}
 413
 414/* Called from RCU critical section */
 415MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 416                                      hwaddr *xlat, hwaddr *plen,
 417                                      bool is_write)
 418{
 419    IOMMUTLBEntry iotlb;
 420    MemoryRegionSection *section;
 421    MemoryRegion *mr;
 422
 423    for (;;) {
 424        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 425        section = address_space_translate_internal(d, addr, &addr, plen, true);
 426        mr = section->mr;
 427
 428        if (!mr->iommu_ops) {
 429            break;
 430        }
 431
 432        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 433        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 434                | (addr & iotlb.addr_mask));
 435        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 436        if (!(iotlb.perm & (1 << is_write))) {
 437            mr = &io_mem_unassigned;
 438            break;
 439        }
 440
 441        as = iotlb.target_as;
 442    }
 443
 444    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 445        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 446        *plen = MIN(page, *plen);
 447    }
 448
 449    *xlat = addr;
 450    return mr;
 451}
 452
 453/* Called from RCU critical section */
 454MemoryRegionSection *
 455address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 456                                  hwaddr *xlat, hwaddr *plen)
 457{
 458    MemoryRegionSection *section;
 459    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
 460
 461    section = address_space_translate_internal(d, addr, xlat, plen, false);
 462
 463    assert(!section->mr->iommu_ops);
 464    return section;
 465}
 466#endif
 467
 468#if !defined(CONFIG_USER_ONLY)
 469
 470static int cpu_common_post_load(void *opaque, int version_id)
 471{
 472    CPUState *cpu = opaque;
 473
 474    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 475       version_id is increased. */
 476    cpu->interrupt_request &= ~0x01;
 477    tlb_flush(cpu, 1);
 478
 479    return 0;
 480}
 481
 482static int cpu_common_pre_load(void *opaque)
 483{
 484    CPUState *cpu = opaque;
 485
 486    cpu->exception_index = -1;
 487
 488    return 0;
 489}
 490
 491static bool cpu_common_exception_index_needed(void *opaque)
 492{
 493    CPUState *cpu = opaque;
 494
 495    return tcg_enabled() && cpu->exception_index != -1;
 496}
 497
 498static const VMStateDescription vmstate_cpu_common_exception_index = {
 499    .name = "cpu_common/exception_index",
 500    .version_id = 1,
 501    .minimum_version_id = 1,
 502    .needed = cpu_common_exception_index_needed,
 503    .fields = (VMStateField[]) {
 504        VMSTATE_INT32(exception_index, CPUState),
 505        VMSTATE_END_OF_LIST()
 506    }
 507};
 508
 509static bool cpu_common_crash_occurred_needed(void *opaque)
 510{
 511    CPUState *cpu = opaque;
 512
 513    return cpu->crash_occurred;
 514}
 515
 516static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 517    .name = "cpu_common/crash_occurred",
 518    .version_id = 1,
 519    .minimum_version_id = 1,
 520    .needed = cpu_common_crash_occurred_needed,
 521    .fields = (VMStateField[]) {
 522        VMSTATE_BOOL(crash_occurred, CPUState),
 523        VMSTATE_END_OF_LIST()
 524    }
 525};
 526
 527const VMStateDescription vmstate_cpu_common = {
 528    .name = "cpu_common",
 529    .version_id = 1,
 530    .minimum_version_id = 1,
 531    .pre_load = cpu_common_pre_load,
 532    .post_load = cpu_common_post_load,
 533    .fields = (VMStateField[]) {
 534        VMSTATE_UINT32(halted, CPUState),
 535        VMSTATE_UINT32(interrupt_request, CPUState),
 536        VMSTATE_END_OF_LIST()
 537    },
 538    .subsections = (const VMStateDescription*[]) {
 539        &vmstate_cpu_common_exception_index,
 540        &vmstate_cpu_common_crash_occurred,
 541        NULL
 542    }
 543};
 544
 545#endif
 546
 547CPUState *qemu_get_cpu(int index)
 548{
 549    CPUState *cpu;
 550
 551    CPU_FOREACH(cpu) {
 552        if (cpu->cpu_index == index) {
 553            return cpu;
 554        }
 555    }
 556
 557    return NULL;
 558}
 559
 560#if !defined(CONFIG_USER_ONLY)
 561void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 562{
 563    CPUAddressSpace *newas;
 564
 565    /* Target code should have set num_ases before calling us */
 566    assert(asidx < cpu->num_ases);
 567
 568    if (asidx == 0) {
 569        /* address space 0 gets the convenience alias */
 570        cpu->as = as;
 571    }
 572
 573    /* KVM cannot currently support multiple address spaces. */
 574    assert(asidx == 0 || !kvm_enabled());
 575
 576    if (!cpu->cpu_ases) {
 577        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 578    }
 579
 580    newas = &cpu->cpu_ases[asidx];
 581    newas->cpu = cpu;
 582    newas->as = as;
 583    if (tcg_enabled()) {
 584        newas->tcg_as_listener.commit = tcg_commit;
 585        memory_listener_register(&newas->tcg_as_listener, as);
 586    }
 587}
 588
 589AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 590{
 591    /* Return the AddressSpace corresponding to the specified index */
 592    return cpu->cpu_ases[asidx].as;
 593}
 594#endif
 595
 596#ifndef CONFIG_USER_ONLY
 597static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 598
 599static int cpu_get_free_index(Error **errp)
 600{
 601    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 602
 603    if (cpu >= MAX_CPUMASK_BITS) {
 604        error_setg(errp, "Trying to use more CPUs than max of %d",
 605                   MAX_CPUMASK_BITS);
 606        return -1;
 607    }
 608
 609    bitmap_set(cpu_index_map, cpu, 1);
 610    return cpu;
 611}
 612
 613void cpu_exec_exit(CPUState *cpu)
 614{
 615    if (cpu->cpu_index == -1) {
 616        /* cpu_index was never allocated by this @cpu or was already freed. */
 617        return;
 618    }
 619
 620    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 621    cpu->cpu_index = -1;
 622}
 623#else
 624
 625static int cpu_get_free_index(Error **errp)
 626{
 627    CPUState *some_cpu;
 628    int cpu_index = 0;
 629
 630    CPU_FOREACH(some_cpu) {
 631        cpu_index++;
 632    }
 633    return cpu_index;
 634}
 635
 636void cpu_exec_exit(CPUState *cpu)
 637{
 638}
 639#endif
 640
 641void cpu_exec_init(CPUState *cpu, Error **errp)
 642{
 643    CPUClass *cc = CPU_GET_CLASS(cpu);
 644    int cpu_index;
 645    Error *local_err = NULL;
 646
 647    cpu->as = NULL;
 648    cpu->num_ases = 0;
 649
 650#ifndef CONFIG_USER_ONLY
 651    cpu->thread_id = qemu_get_thread_id();
 652
 653    /* This is a softmmu CPU object, so create a property for it
 654     * so users can wire up its memory. (This can't go in qom/cpu.c
 655     * because that file is compiled only once for both user-mode
 656     * and system builds.) The default if no link is set up is to use
 657     * the system address space.
 658     */
 659    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 660                             (Object **)&cpu->memory,
 661                             qdev_prop_allow_set_link_before_realize,
 662                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 663                             &error_abort);
 664    cpu->memory = system_memory;
 665    object_ref(OBJECT(cpu->memory));
 666#endif
 667
 668#if defined(CONFIG_USER_ONLY)
 669    cpu_list_lock();
 670#endif
 671    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 672    if (local_err) {
 673        error_propagate(errp, local_err);
 674#if defined(CONFIG_USER_ONLY)
 675        cpu_list_unlock();
 676#endif
 677        return;
 678    }
 679    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 680#if defined(CONFIG_USER_ONLY)
 681    cpu_list_unlock();
 682#endif
 683    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 684        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 685    }
 686    if (cc->vmsd != NULL) {
 687        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 688    }
 689}
 690
 691#if defined(CONFIG_USER_ONLY)
 692static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 693{
 694    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 695}
 696#else
 697static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 698{
 699    MemTxAttrs attrs;
 700    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 701    int asidx = cpu_asidx_from_attrs(cpu, attrs);
 702    if (phys != -1) {
 703        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 704                                phys | (pc & ~TARGET_PAGE_MASK));
 705    }
 706}
 707#endif
 708
 709#if defined(CONFIG_USER_ONLY)
 710void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 711
 712{
 713}
 714
 715int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 716                          int flags)
 717{
 718    return -ENOSYS;
 719}
 720
 721void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 722{
 723}
 724
 725int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 726                          int flags, CPUWatchpoint **watchpoint)
 727{
 728    return -ENOSYS;
 729}
 730#else
 731/* Add a watchpoint.  */
 732int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 733                          int flags, CPUWatchpoint **watchpoint)
 734{
 735    CPUWatchpoint *wp;
 736
 737    /* forbid ranges which are empty or run off the end of the address space */
 738    if (len == 0 || (addr + len - 1) < addr) {
 739        error_report("tried to set invalid watchpoint at %"
 740                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 741        return -EINVAL;
 742    }
 743    wp = g_malloc(sizeof(*wp));
 744
 745    wp->vaddr = addr;
 746    wp->len = len;
 747    wp->flags = flags;
 748
 749    /* keep all GDB-injected watchpoints in front */
 750    if (flags & BP_GDB) {
 751        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 752    } else {
 753        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 754    }
 755
 756    tlb_flush_page(cpu, addr);
 757
 758    if (watchpoint)
 759        *watchpoint = wp;
 760    return 0;
 761}
 762
 763/* Remove a specific watchpoint.  */
 764int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 765                          int flags)
 766{
 767    CPUWatchpoint *wp;
 768
 769    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 770        if (addr == wp->vaddr && len == wp->len
 771                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 772            cpu_watchpoint_remove_by_ref(cpu, wp);
 773            return 0;
 774        }
 775    }
 776    return -ENOENT;
 777}
 778
 779/* Remove a specific watchpoint by reference.  */
 780void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 781{
 782    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 783
 784    tlb_flush_page(cpu, watchpoint->vaddr);
 785
 786    g_free(watchpoint);
 787}
 788
 789/* Remove all matching watchpoints.  */
 790void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 791{
 792    CPUWatchpoint *wp, *next;
 793
 794    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 795        if (wp->flags & mask) {
 796            cpu_watchpoint_remove_by_ref(cpu, wp);
 797        }
 798    }
 799}
 800
 801/* Return true if this watchpoint address matches the specified
 802 * access (ie the address range covered by the watchpoint overlaps
 803 * partially or completely with the address range covered by the
 804 * access).
 805 */
 806static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 807                                                  vaddr addr,
 808                                                  vaddr len)
 809{
 810    /* We know the lengths are non-zero, but a little caution is
 811     * required to avoid errors in the case where the range ends
 812     * exactly at the top of the address space and so addr + len
 813     * wraps round to zero.
 814     */
 815    vaddr wpend = wp->vaddr + wp->len - 1;
 816    vaddr addrend = addr + len - 1;
 817
 818    return !(addr > wpend || wp->vaddr > addrend);
 819}
 820
 821#endif
 822
 823/* Add a breakpoint.  */
 824int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 825                          CPUBreakpoint **breakpoint)
 826{
 827    CPUBreakpoint *bp;
 828
 829    bp = g_malloc(sizeof(*bp));
 830
 831    bp->pc = pc;
 832    bp->flags = flags;
 833
 834    /* keep all GDB-injected breakpoints in front */
 835    if (flags & BP_GDB) {
 836        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 837    } else {
 838        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 839    }
 840
 841    breakpoint_invalidate(cpu, pc);
 842
 843    if (breakpoint) {
 844        *breakpoint = bp;
 845    }
 846    return 0;
 847}
 848
 849/* Remove a specific breakpoint.  */
 850int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 851{
 852    CPUBreakpoint *bp;
 853
 854    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 855        if (bp->pc == pc && bp->flags == flags) {
 856            cpu_breakpoint_remove_by_ref(cpu, bp);
 857            return 0;
 858        }
 859    }
 860    return -ENOENT;
 861}
 862
 863/* Remove a specific breakpoint by reference.  */
 864void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 865{
 866    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 867
 868    breakpoint_invalidate(cpu, breakpoint->pc);
 869
 870    g_free(breakpoint);
 871}
 872
 873/* Remove all matching breakpoints. */
 874void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 875{
 876    CPUBreakpoint *bp, *next;
 877
 878    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 879        if (bp->flags & mask) {
 880            cpu_breakpoint_remove_by_ref(cpu, bp);
 881        }
 882    }
 883}
 884
 885/* enable or disable single step mode. EXCP_DEBUG is returned by the
 886   CPU loop after each instruction */
 887void cpu_single_step(CPUState *cpu, int enabled)
 888{
 889    if (cpu->singlestep_enabled != enabled) {
 890        cpu->singlestep_enabled = enabled;
 891        if (kvm_enabled()) {
 892            kvm_update_guest_debug(cpu, 0);
 893        } else {
 894            /* must flush all the translated code to avoid inconsistencies */
 895            /* XXX: only flush what is necessary */
 896            tb_flush(cpu);
 897        }
 898    }
 899}
 900
 901void cpu_abort(CPUState *cpu, const char *fmt, ...)
 902{
 903    va_list ap;
 904    va_list ap2;
 905
 906    va_start(ap, fmt);
 907    va_copy(ap2, ap);
 908    fprintf(stderr, "qemu: fatal: ");
 909    vfprintf(stderr, fmt, ap);
 910    fprintf(stderr, "\n");
 911    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 912    if (qemu_log_separate()) {
 913        qemu_log("qemu: fatal: ");
 914        qemu_log_vprintf(fmt, ap2);
 915        qemu_log("\n");
 916        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 917        qemu_log_flush();
 918        qemu_log_close();
 919    }
 920    va_end(ap2);
 921    va_end(ap);
 922    replay_finish();
 923#if defined(CONFIG_USER_ONLY)
 924    {
 925        struct sigaction act;
 926        sigfillset(&act.sa_mask);
 927        act.sa_handler = SIG_DFL;
 928        sigaction(SIGABRT, &act, NULL);
 929    }
 930#endif
 931    abort();
 932}
 933
 934#if !defined(CONFIG_USER_ONLY)
 935/* Called from RCU critical section */
 936static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 937{
 938    RAMBlock *block;
 939
 940    block = atomic_rcu_read(&ram_list.mru_block);
 941    if (block && addr - block->offset < block->max_length) {
 942        return block;
 943    }
 944    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 945        if (addr - block->offset < block->max_length) {
 946            goto found;
 947        }
 948    }
 949
 950    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 951    abort();
 952
 953found:
 954    /* It is safe to write mru_block outside the iothread lock.  This
 955     * is what happens:
 956     *
 957     *     mru_block = xxx
 958     *     rcu_read_unlock()
 959     *                                        xxx removed from list
 960     *                  rcu_read_lock()
 961     *                  read mru_block
 962     *                                        mru_block = NULL;
 963     *                                        call_rcu(reclaim_ramblock, xxx);
 964     *                  rcu_read_unlock()
 965     *
 966     * atomic_rcu_set is not needed here.  The block was already published
 967     * when it was placed into the list.  Here we're just making an extra
 968     * copy of the pointer.
 969     */
 970    ram_list.mru_block = block;
 971    return block;
 972}
 973
 974static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 975{
 976    CPUState *cpu;
 977    ram_addr_t start1;
 978    RAMBlock *block;
 979    ram_addr_t end;
 980
 981    end = TARGET_PAGE_ALIGN(start + length);
 982    start &= TARGET_PAGE_MASK;
 983
 984    rcu_read_lock();
 985    block = qemu_get_ram_block(start);
 986    assert(block == qemu_get_ram_block(end - 1));
 987    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 988    CPU_FOREACH(cpu) {
 989        tlb_reset_dirty(cpu, start1, length);
 990    }
 991    rcu_read_unlock();
 992}
 993
 994/* Note: start and end must be within the same ram block.  */
 995bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 996                                              ram_addr_t length,
 997                                              unsigned client)
 998{
 999    DirtyMemoryBlocks *blocks;
1000    unsigned long end, page;
1001    bool dirty = false;
1002
1003    if (length == 0) {
1004        return false;
1005    }
1006
1007    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1008    page = start >> TARGET_PAGE_BITS;
1009
1010    rcu_read_lock();
1011
1012    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1013
1014    while (page < end) {
1015        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1016        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1017        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1018
1019        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1020                                              offset, num);
1021        page += num;
1022    }
1023
1024    rcu_read_unlock();
1025
1026    if (dirty && tcg_enabled()) {
1027        tlb_reset_dirty_range_all(start, length);
1028    }
1029
1030    return dirty;
1031}
1032
1033/* Called from RCU critical section */
1034hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1035                                       MemoryRegionSection *section,
1036                                       target_ulong vaddr,
1037                                       hwaddr paddr, hwaddr xlat,
1038                                       int prot,
1039                                       target_ulong *address)
1040{
1041    hwaddr iotlb;
1042    CPUWatchpoint *wp;
1043
1044    if (memory_region_is_ram(section->mr)) {
1045        /* Normal RAM.  */
1046        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1047            + xlat;
1048        if (!section->readonly) {
1049            iotlb |= PHYS_SECTION_NOTDIRTY;
1050        } else {
1051            iotlb |= PHYS_SECTION_ROM;
1052        }
1053    } else {
1054        AddressSpaceDispatch *d;
1055
1056        d = atomic_rcu_read(&section->address_space->dispatch);
1057        iotlb = section - d->map.sections;
1058        iotlb += xlat;
1059    }
1060
1061    /* Make accesses to pages with watchpoints go via the
1062       watchpoint trap routines.  */
1063    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1064        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1065            /* Avoid trapping reads of pages with a write breakpoint. */
1066            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1067                iotlb = PHYS_SECTION_WATCH + paddr;
1068                *address |= TLB_MMIO;
1069                break;
1070            }
1071        }
1072    }
1073
1074    return iotlb;
1075}
1076#endif /* defined(CONFIG_USER_ONLY) */
1077
1078#if !defined(CONFIG_USER_ONLY)
1079
1080static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1081                             uint16_t section);
1082static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1083
1084static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1085                               qemu_anon_ram_alloc;
1086
1087/*
1088 * Set a custom physical guest memory alloator.
1089 * Accelerators with unusual needs may need this.  Hopefully, we can
1090 * get rid of it eventually.
1091 */
1092void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1093{
1094    phys_mem_alloc = alloc;
1095}
1096
1097static uint16_t phys_section_add(PhysPageMap *map,
1098                                 MemoryRegionSection *section)
1099{
1100    /* The physical section number is ORed with a page-aligned
1101     * pointer to produce the iotlb entries.  Thus it should
1102     * never overflow into the page-aligned value.
1103     */
1104    assert(map->sections_nb < TARGET_PAGE_SIZE);
1105
1106    if (map->sections_nb == map->sections_nb_alloc) {
1107        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1108        map->sections = g_renew(MemoryRegionSection, map->sections,
1109                                map->sections_nb_alloc);
1110    }
1111    map->sections[map->sections_nb] = *section;
1112    memory_region_ref(section->mr);
1113    return map->sections_nb++;
1114}
1115
1116static void phys_section_destroy(MemoryRegion *mr)
1117{
1118    bool have_sub_page = mr->subpage;
1119
1120    memory_region_unref(mr);
1121
1122    if (have_sub_page) {
1123        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1124        object_unref(OBJECT(&subpage->iomem));
1125        g_free(subpage);
1126    }
1127}
1128
1129static void phys_sections_free(PhysPageMap *map)
1130{
1131    while (map->sections_nb > 0) {
1132        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1133        phys_section_destroy(section->mr);
1134    }
1135    g_free(map->sections);
1136    g_free(map->nodes);
1137}
1138
1139static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1140{
1141    subpage_t *subpage;
1142    hwaddr base = section->offset_within_address_space
1143        & TARGET_PAGE_MASK;
1144    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1145                                                   d->map.nodes, d->map.sections);
1146    MemoryRegionSection subsection = {
1147        .offset_within_address_space = base,
1148        .size = int128_make64(TARGET_PAGE_SIZE),
1149    };
1150    hwaddr start, end;
1151
1152    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1153
1154    if (!(existing->mr->subpage)) {
1155        subpage = subpage_init(d->as, base);
1156        subsection.address_space = d->as;
1157        subsection.mr = &subpage->iomem;
1158        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1159                      phys_section_add(&d->map, &subsection));
1160    } else {
1161        subpage = container_of(existing->mr, subpage_t, iomem);
1162    }
1163    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1164    end = start + int128_get64(section->size) - 1;
1165    subpage_register(subpage, start, end,
1166                     phys_section_add(&d->map, section));
1167}
1168
1169
1170static void register_multipage(AddressSpaceDispatch *d,
1171                               MemoryRegionSection *section)
1172{
1173    hwaddr start_addr = section->offset_within_address_space;
1174    uint16_t section_index = phys_section_add(&d->map, section);
1175    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1176                                                    TARGET_PAGE_BITS));
1177
1178    assert(num_pages);
1179    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1180}
1181
1182static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1183{
1184    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1185    AddressSpaceDispatch *d = as->next_dispatch;
1186    MemoryRegionSection now = *section, remain = *section;
1187    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1188
1189    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1191                       - now.offset_within_address_space;
1192
1193        now.size = int128_min(int128_make64(left), now.size);
1194        register_subpage(d, &now);
1195    } else {
1196        now.size = int128_zero();
1197    }
1198    while (int128_ne(remain.size, now.size)) {
1199        remain.size = int128_sub(remain.size, now.size);
1200        remain.offset_within_address_space += int128_get64(now.size);
1201        remain.offset_within_region += int128_get64(now.size);
1202        now = remain;
1203        if (int128_lt(remain.size, page_size)) {
1204            register_subpage(d, &now);
1205        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1206            now.size = page_size;
1207            register_subpage(d, &now);
1208        } else {
1209            now.size = int128_and(now.size, int128_neg(page_size));
1210            register_multipage(d, &now);
1211        }
1212    }
1213}
1214
1215void qemu_flush_coalesced_mmio_buffer(void)
1216{
1217    if (kvm_enabled())
1218        kvm_flush_coalesced_mmio_buffer();
1219}
1220
1221void qemu_mutex_lock_ramlist(void)
1222{
1223    qemu_mutex_lock(&ram_list.mutex);
1224}
1225
1226void qemu_mutex_unlock_ramlist(void)
1227{
1228    qemu_mutex_unlock(&ram_list.mutex);
1229}
1230
1231#ifdef __linux__
1232static void *file_ram_alloc(RAMBlock *block,
1233                            ram_addr_t memory,
1234                            const char *path,
1235                            Error **errp)
1236{
1237    bool unlink_on_error = false;
1238    char *filename;
1239    char *sanitized_name;
1240    char *c;
1241    void *area;
1242    int fd = -1;
1243    int64_t page_size;
1244
1245    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1246        error_setg(errp,
1247                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1248        return NULL;
1249    }
1250
1251    for (;;) {
1252        fd = open(path, O_RDWR);
1253        if (fd >= 0) {
1254            /* @path names an existing file, use it */
1255            break;
1256        }
1257        if (errno == ENOENT) {
1258            /* @path names a file that doesn't exist, create it */
1259            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1260            if (fd >= 0) {
1261                unlink_on_error = true;
1262                break;
1263            }
1264        } else if (errno == EISDIR) {
1265            /* @path names a directory, create a file there */
1266            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1267            sanitized_name = g_strdup(memory_region_name(block->mr));
1268            for (c = sanitized_name; *c != '\0'; c++) {
1269                if (*c == '/') {
1270                    *c = '_';
1271                }
1272            }
1273
1274            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1275                                       sanitized_name);
1276            g_free(sanitized_name);
1277
1278            fd = mkstemp(filename);
1279            if (fd >= 0) {
1280                unlink(filename);
1281                g_free(filename);
1282                break;
1283            }
1284            g_free(filename);
1285        }
1286        if (errno != EEXIST && errno != EINTR) {
1287            error_setg_errno(errp, errno,
1288                             "can't open backing store %s for guest RAM",
1289                             path);
1290            goto error;
1291        }
1292        /*
1293         * Try again on EINTR and EEXIST.  The latter happens when
1294         * something else creates the file between our two open().
1295         */
1296    }
1297
1298    page_size = qemu_fd_getpagesize(fd);
1299    block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1300
1301    if (memory < page_size) {
1302        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1303                   "or larger than page size 0x%" PRIx64,
1304                   memory, page_size);
1305        goto error;
1306    }
1307
1308    memory = ROUND_UP(memory, page_size);
1309
1310    /*
1311     * ftruncate is not supported by hugetlbfs in older
1312     * hosts, so don't bother bailing out on errors.
1313     * If anything goes wrong with it under other filesystems,
1314     * mmap will fail.
1315     */
1316    if (ftruncate(fd, memory)) {
1317        perror("ftruncate");
1318    }
1319
1320    area = qemu_ram_mmap(fd, memory, block->mr->align,
1321                         block->flags & RAM_SHARED);
1322    if (area == MAP_FAILED) {
1323        error_setg_errno(errp, errno,
1324                         "unable to map backing store for guest RAM");
1325        goto error;
1326    }
1327
1328    if (mem_prealloc) {
1329        os_mem_prealloc(fd, area, memory);
1330    }
1331
1332    block->fd = fd;
1333    return area;
1334
1335error:
1336    if (unlink_on_error) {
1337        unlink(path);
1338    }
1339    if (fd != -1) {
1340        close(fd);
1341    }
1342    return NULL;
1343}
1344#endif
1345
1346/* Called with the ramlist lock held.  */
1347static ram_addr_t find_ram_offset(ram_addr_t size)
1348{
1349    RAMBlock *block, *next_block;
1350    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1351
1352    assert(size != 0); /* it would hand out same offset multiple times */
1353
1354    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1355        return 0;
1356    }
1357
1358    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1359        ram_addr_t end, next = RAM_ADDR_MAX;
1360
1361        end = block->offset + block->max_length;
1362
1363        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1364            if (next_block->offset >= end) {
1365                next = MIN(next, next_block->offset);
1366            }
1367        }
1368        if (next - end >= size && next - end < mingap) {
1369            offset = end;
1370            mingap = next - end;
1371        }
1372    }
1373
1374    if (offset == RAM_ADDR_MAX) {
1375        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1376                (uint64_t)size);
1377        abort();
1378    }
1379
1380    return offset;
1381}
1382
1383ram_addr_t last_ram_offset(void)
1384{
1385    RAMBlock *block;
1386    ram_addr_t last = 0;
1387
1388    rcu_read_lock();
1389    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1390        last = MAX(last, block->offset + block->max_length);
1391    }
1392    rcu_read_unlock();
1393    return last;
1394}
1395
1396static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1397{
1398    int ret;
1399
1400    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1401    if (!machine_dump_guest_core(current_machine)) {
1402        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1403        if (ret) {
1404            perror("qemu_madvise");
1405            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1406                            "but dump_guest_core=off specified\n");
1407        }
1408    }
1409}
1410
1411/* Called within an RCU critical section, or while the ramlist lock
1412 * is held.
1413 */
1414static RAMBlock *find_ram_block(ram_addr_t addr)
1415{
1416    RAMBlock *block;
1417
1418    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1419        if (block->offset == addr) {
1420            return block;
1421        }
1422    }
1423
1424    return NULL;
1425}
1426
1427const char *qemu_ram_get_idstr(RAMBlock *rb)
1428{
1429    return rb->idstr;
1430}
1431
1432/* Called with iothread lock held.  */
1433void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1434{
1435    RAMBlock *new_block, *block;
1436
1437    rcu_read_lock();
1438    new_block = find_ram_block(addr);
1439    assert(new_block);
1440    assert(!new_block->idstr[0]);
1441
1442    if (dev) {
1443        char *id = qdev_get_dev_path(dev);
1444        if (id) {
1445            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1446            g_free(id);
1447        }
1448    }
1449    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1450
1451    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1452        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1453            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1454                    new_block->idstr);
1455            abort();
1456        }
1457    }
1458    rcu_read_unlock();
1459}
1460
1461/* Called with iothread lock held.  */
1462void qemu_ram_unset_idstr(ram_addr_t addr)
1463{
1464    RAMBlock *block;
1465
1466    /* FIXME: arch_init.c assumes that this is not called throughout
1467     * migration.  Ignore the problem since hot-unplug during migration
1468     * does not work anyway.
1469     */
1470
1471    rcu_read_lock();
1472    block = find_ram_block(addr);
1473    if (block) {
1474        memset(block->idstr, 0, sizeof(block->idstr));
1475    }
1476    rcu_read_unlock();
1477}
1478
1479static int memory_try_enable_merging(void *addr, size_t len)
1480{
1481    if (!machine_mem_merge(current_machine)) {
1482        /* disabled by the user */
1483        return 0;
1484    }
1485
1486    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1487}
1488
1489/* Only legal before guest might have detected the memory size: e.g. on
1490 * incoming migration, or right after reset.
1491 *
1492 * As memory core doesn't know how is memory accessed, it is up to
1493 * resize callback to update device state and/or add assertions to detect
1494 * misuse, if necessary.
1495 */
1496int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1497{
1498    RAMBlock *block = find_ram_block(base);
1499
1500    assert(block);
1501
1502    newsize = HOST_PAGE_ALIGN(newsize);
1503
1504    if (block->used_length == newsize) {
1505        return 0;
1506    }
1507
1508    if (!(block->flags & RAM_RESIZEABLE)) {
1509        error_setg_errno(errp, EINVAL,
1510                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1511                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1512                         newsize, block->used_length);
1513        return -EINVAL;
1514    }
1515
1516    if (block->max_length < newsize) {
1517        error_setg_errno(errp, EINVAL,
1518                         "Length too large: %s: 0x" RAM_ADDR_FMT
1519                         " > 0x" RAM_ADDR_FMT, block->idstr,
1520                         newsize, block->max_length);
1521        return -EINVAL;
1522    }
1523
1524    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1525    block->used_length = newsize;
1526    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1527                                        DIRTY_CLIENTS_ALL);
1528    memory_region_set_size(block->mr, newsize);
1529    if (block->resized) {
1530        block->resized(block->idstr, newsize, block->host);
1531    }
1532    return 0;
1533}
1534
1535/* Called with ram_list.mutex held */
1536static void dirty_memory_extend(ram_addr_t old_ram_size,
1537                                ram_addr_t new_ram_size)
1538{
1539    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1540                                             DIRTY_MEMORY_BLOCK_SIZE);
1541    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1542                                             DIRTY_MEMORY_BLOCK_SIZE);
1543    int i;
1544
1545    /* Only need to extend if block count increased */
1546    if (new_num_blocks <= old_num_blocks) {
1547        return;
1548    }
1549
1550    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1551        DirtyMemoryBlocks *old_blocks;
1552        DirtyMemoryBlocks *new_blocks;
1553        int j;
1554
1555        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1556        new_blocks = g_malloc(sizeof(*new_blocks) +
1557                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1558
1559        if (old_num_blocks) {
1560            memcpy(new_blocks->blocks, old_blocks->blocks,
1561                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1562        }
1563
1564        for (j = old_num_blocks; j < new_num_blocks; j++) {
1565            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1566        }
1567
1568        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1569
1570        if (old_blocks) {
1571            g_free_rcu(old_blocks, rcu);
1572        }
1573    }
1574}
1575
1576static void ram_block_add(RAMBlock *new_block, Error **errp)
1577{
1578    RAMBlock *block;
1579    RAMBlock *last_block = NULL;
1580    ram_addr_t old_ram_size, new_ram_size;
1581    Error *err = NULL;
1582
1583    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1584
1585    qemu_mutex_lock_ramlist();
1586    new_block->offset = find_ram_offset(new_block->max_length);
1587
1588    if (!new_block->host) {
1589        if (xen_enabled()) {
1590            xen_ram_alloc(new_block->offset, new_block->max_length,
1591                          new_block->mr, &err);
1592            if (err) {
1593                error_propagate(errp, err);
1594                qemu_mutex_unlock_ramlist();
1595                return;
1596            }
1597        } else {
1598            new_block->host = phys_mem_alloc(new_block->max_length,
1599                                             &new_block->mr->align);
1600            if (!new_block->host) {
1601                error_setg_errno(errp, errno,
1602                                 "cannot set up guest memory '%s'",
1603                                 memory_region_name(new_block->mr));
1604                qemu_mutex_unlock_ramlist();
1605                return;
1606            }
1607            memory_try_enable_merging(new_block->host, new_block->max_length);
1608        }
1609    }
1610
1611    new_ram_size = MAX(old_ram_size,
1612              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1613    if (new_ram_size > old_ram_size) {
1614        migration_bitmap_extend(old_ram_size, new_ram_size);
1615        dirty_memory_extend(old_ram_size, new_ram_size);
1616    }
1617    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1618     * QLIST (which has an RCU-friendly variant) does not have insertion at
1619     * tail, so save the last element in last_block.
1620     */
1621    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1622        last_block = block;
1623        if (block->max_length < new_block->max_length) {
1624            break;
1625        }
1626    }
1627    if (block) {
1628        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1629    } else if (last_block) {
1630        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1631    } else { /* list is empty */
1632        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1633    }
1634    ram_list.mru_block = NULL;
1635
1636    /* Write list before version */
1637    smp_wmb();
1638    ram_list.version++;
1639    qemu_mutex_unlock_ramlist();
1640
1641    cpu_physical_memory_set_dirty_range(new_block->offset,
1642                                        new_block->used_length,
1643                                        DIRTY_CLIENTS_ALL);
1644
1645    if (new_block->host) {
1646        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1647        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1648        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1649        if (kvm_enabled()) {
1650            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1651        }
1652    }
1653}
1654
1655#ifdef __linux__
1656RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1657                                   bool share, const char *mem_path,
1658                                   Error **errp)
1659{
1660    RAMBlock *new_block;
1661    Error *local_err = NULL;
1662
1663    if (xen_enabled()) {
1664        error_setg(errp, "-mem-path not supported with Xen");
1665        return NULL;
1666    }
1667
1668    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1669        /*
1670         * file_ram_alloc() needs to allocate just like
1671         * phys_mem_alloc, but we haven't bothered to provide
1672         * a hook there.
1673         */
1674        error_setg(errp,
1675                   "-mem-path not supported with this accelerator");
1676        return NULL;
1677    }
1678
1679    size = HOST_PAGE_ALIGN(size);
1680    new_block = g_malloc0(sizeof(*new_block));
1681    new_block->mr = mr;
1682    new_block->used_length = size;
1683    new_block->max_length = size;
1684    new_block->flags = share ? RAM_SHARED : 0;
1685    new_block->host = file_ram_alloc(new_block, size,
1686                                     mem_path, errp);
1687    if (!new_block->host) {
1688        g_free(new_block);
1689        return NULL;
1690    }
1691
1692    ram_block_add(new_block, &local_err);
1693    if (local_err) {
1694        g_free(new_block);
1695        error_propagate(errp, local_err);
1696        return NULL;
1697    }
1698    return new_block;
1699}
1700#endif
1701
1702static
1703RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1704                                  void (*resized)(const char*,
1705                                                  uint64_t length,
1706                                                  void *host),
1707                                  void *host, bool resizeable,
1708                                  MemoryRegion *mr, Error **errp)
1709{
1710    RAMBlock *new_block;
1711    Error *local_err = NULL;
1712
1713    size = HOST_PAGE_ALIGN(size);
1714    max_size = HOST_PAGE_ALIGN(max_size);
1715    new_block = g_malloc0(sizeof(*new_block));
1716    new_block->mr = mr;
1717    new_block->resized = resized;
1718    new_block->used_length = size;
1719    new_block->max_length = max_size;
1720    assert(max_size >= size);
1721    new_block->fd = -1;
1722    new_block->host = host;
1723    if (host) {
1724        new_block->flags |= RAM_PREALLOC;
1725    }
1726    if (resizeable) {
1727        new_block->flags |= RAM_RESIZEABLE;
1728    }
1729    ram_block_add(new_block, &local_err);
1730    if (local_err) {
1731        g_free(new_block);
1732        error_propagate(errp, local_err);
1733        return NULL;
1734    }
1735    return new_block;
1736}
1737
1738RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1739                                   MemoryRegion *mr, Error **errp)
1740{
1741    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1742}
1743
1744RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1745{
1746    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1747}
1748
1749RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1750                                     void (*resized)(const char*,
1751                                                     uint64_t length,
1752                                                     void *host),
1753                                     MemoryRegion *mr, Error **errp)
1754{
1755    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1756}
1757
1758static void reclaim_ramblock(RAMBlock *block)
1759{
1760    if (block->flags & RAM_PREALLOC) {
1761        ;
1762    } else if (xen_enabled()) {
1763        xen_invalidate_map_cache_entry(block->host);
1764#ifndef _WIN32
1765    } else if (block->fd >= 0) {
1766        qemu_ram_munmap(block->host, block->max_length);
1767        close(block->fd);
1768#endif
1769    } else {
1770        qemu_anon_ram_free(block->host, block->max_length);
1771    }
1772    g_free(block);
1773}
1774
1775void qemu_ram_free(RAMBlock *block)
1776{
1777    if (!block) {
1778        return;
1779    }
1780
1781    qemu_mutex_lock_ramlist();
1782    QLIST_REMOVE_RCU(block, next);
1783    ram_list.mru_block = NULL;
1784    /* Write list before version */
1785    smp_wmb();
1786    ram_list.version++;
1787    call_rcu(block, reclaim_ramblock, rcu);
1788    qemu_mutex_unlock_ramlist();
1789}
1790
1791#ifndef _WIN32
1792void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1793{
1794    RAMBlock *block;
1795    ram_addr_t offset;
1796    int flags;
1797    void *area, *vaddr;
1798
1799    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1800        offset = addr - block->offset;
1801        if (offset < block->max_length) {
1802            vaddr = ramblock_ptr(block, offset);
1803            if (block->flags & RAM_PREALLOC) {
1804                ;
1805            } else if (xen_enabled()) {
1806                abort();
1807            } else {
1808                flags = MAP_FIXED;
1809                if (block->fd >= 0) {
1810                    flags |= (block->flags & RAM_SHARED ?
1811                              MAP_SHARED : MAP_PRIVATE);
1812                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1813                                flags, block->fd, offset);
1814                } else {
1815                    /*
1816                     * Remap needs to match alloc.  Accelerators that
1817                     * set phys_mem_alloc never remap.  If they did,
1818                     * we'd need a remap hook here.
1819                     */
1820                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1821
1822                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1823                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1824                                flags, -1, 0);
1825                }
1826                if (area != vaddr) {
1827                    fprintf(stderr, "Could not remap addr: "
1828                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1829                            length, addr);
1830                    exit(1);
1831                }
1832                memory_try_enable_merging(vaddr, length);
1833                qemu_ram_setup_dump(vaddr, length);
1834            }
1835        }
1836    }
1837}
1838#endif /* !_WIN32 */
1839
1840int qemu_get_ram_fd(ram_addr_t addr)
1841{
1842    RAMBlock *block;
1843    int fd;
1844
1845    rcu_read_lock();
1846    block = qemu_get_ram_block(addr);
1847    fd = block->fd;
1848    rcu_read_unlock();
1849    return fd;
1850}
1851
1852void qemu_set_ram_fd(ram_addr_t addr, int fd)
1853{
1854    RAMBlock *block;
1855
1856    rcu_read_lock();
1857    block = qemu_get_ram_block(addr);
1858    block->fd = fd;
1859    rcu_read_unlock();
1860}
1861
1862void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1863{
1864    RAMBlock *block;
1865    void *ptr;
1866
1867    rcu_read_lock();
1868    block = qemu_get_ram_block(addr);
1869    ptr = ramblock_ptr(block, 0);
1870    rcu_read_unlock();
1871    return ptr;
1872}
1873
1874/* Return a host pointer to ram allocated with qemu_ram_alloc.
1875 * This should not be used for general purpose DMA.  Use address_space_map
1876 * or address_space_rw instead. For local memory (e.g. video ram) that the
1877 * device owns, use memory_region_get_ram_ptr.
1878 *
1879 * Called within RCU critical section.
1880 */
1881void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1882{
1883    RAMBlock *block = ram_block;
1884
1885    if (block == NULL) {
1886        block = qemu_get_ram_block(addr);
1887    }
1888
1889    if (xen_enabled() && block->host == NULL) {
1890        /* We need to check if the requested address is in the RAM
1891         * because we don't want to map the entire memory in QEMU.
1892         * In that case just map until the end of the page.
1893         */
1894        if (block->offset == 0) {
1895            return xen_map_cache(addr, 0, 0);
1896        }
1897
1898        block->host = xen_map_cache(block->offset, block->max_length, 1);
1899    }
1900    return ramblock_ptr(block, addr - block->offset);
1901}
1902
1903/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1904 * but takes a size argument.
1905 *
1906 * Called within RCU critical section.
1907 */
1908static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1909                                 hwaddr *size)
1910{
1911    RAMBlock *block = ram_block;
1912    ram_addr_t offset_inside_block;
1913    if (*size == 0) {
1914        return NULL;
1915    }
1916
1917    if (block == NULL) {
1918        block = qemu_get_ram_block(addr);
1919    }
1920    offset_inside_block = addr - block->offset;
1921    *size = MIN(*size, block->max_length - offset_inside_block);
1922
1923    if (xen_enabled() && block->host == NULL) {
1924        /* We need to check if the requested address is in the RAM
1925         * because we don't want to map the entire memory in QEMU.
1926         * In that case just map the requested area.
1927         */
1928        if (block->offset == 0) {
1929            return xen_map_cache(addr, *size, 1);
1930        }
1931
1932        block->host = xen_map_cache(block->offset, block->max_length, 1);
1933    }
1934
1935    return ramblock_ptr(block, offset_inside_block);
1936}
1937
1938/*
1939 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1940 * in that RAMBlock.
1941 *
1942 * ptr: Host pointer to look up
1943 * round_offset: If true round the result offset down to a page boundary
1944 * *ram_addr: set to result ram_addr
1945 * *offset: set to result offset within the RAMBlock
1946 *
1947 * Returns: RAMBlock (or NULL if not found)
1948 *
1949 * By the time this function returns, the returned pointer is not protected
1950 * by RCU anymore.  If the caller is not within an RCU critical section and
1951 * does not hold the iothread lock, it must have other means of protecting the
1952 * pointer, such as a reference to the region that includes the incoming
1953 * ram_addr_t.
1954 */
1955RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1956                                   ram_addr_t *ram_addr,
1957                                   ram_addr_t *offset)
1958{
1959    RAMBlock *block;
1960    uint8_t *host = ptr;
1961
1962    if (xen_enabled()) {
1963        rcu_read_lock();
1964        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1965        block = qemu_get_ram_block(*ram_addr);
1966        if (block) {
1967            *offset = (host - block->host);
1968        }
1969        rcu_read_unlock();
1970        return block;
1971    }
1972
1973    rcu_read_lock();
1974    block = atomic_rcu_read(&ram_list.mru_block);
1975    if (block && block->host && host - block->host < block->max_length) {
1976        goto found;
1977    }
1978
1979    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1980        /* This case append when the block is not mapped. */
1981        if (block->host == NULL) {
1982            continue;
1983        }
1984        if (host - block->host < block->max_length) {
1985            goto found;
1986        }
1987    }
1988
1989    rcu_read_unlock();
1990    return NULL;
1991
1992found:
1993    *offset = (host - block->host);
1994    if (round_offset) {
1995        *offset &= TARGET_PAGE_MASK;
1996    }
1997    *ram_addr = block->offset + *offset;
1998    rcu_read_unlock();
1999    return block;
2000}
2001
2002/*
2003 * Finds the named RAMBlock
2004 *
2005 * name: The name of RAMBlock to find
2006 *
2007 * Returns: RAMBlock (or NULL if not found)
2008 */
2009RAMBlock *qemu_ram_block_by_name(const char *name)
2010{
2011    RAMBlock *block;
2012
2013    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2014        if (!strcmp(name, block->idstr)) {
2015            return block;
2016        }
2017    }
2018
2019    return NULL;
2020}
2021
2022/* Some of the softmmu routines need to translate from a host pointer
2023   (typically a TLB entry) back to a ram offset.  */
2024MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2025{
2026    RAMBlock *block;
2027    ram_addr_t offset; /* Not used */
2028
2029    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2030
2031    if (!block) {
2032        return NULL;
2033    }
2034
2035    return block->mr;
2036}
2037
2038/* Called within RCU critical section.  */
2039static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2040                               uint64_t val, unsigned size)
2041{
2042    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2043        tb_invalidate_phys_page_fast(ram_addr, size);
2044    }
2045    switch (size) {
2046    case 1:
2047        stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2048        break;
2049    case 2:
2050        stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2051        break;
2052    case 4:
2053        stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2054        break;
2055    default:
2056        abort();
2057    }
2058    /* Set both VGA and migration bits for simplicity and to remove
2059     * the notdirty callback faster.
2060     */
2061    cpu_physical_memory_set_dirty_range(ram_addr, size,
2062                                        DIRTY_CLIENTS_NOCODE);
2063    /* we remove the notdirty callback only if the code has been
2064       flushed */
2065    if (!cpu_physical_memory_is_clean(ram_addr)) {
2066        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2067    }
2068}
2069
2070static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2071                                 unsigned size, bool is_write)
2072{
2073    return is_write;
2074}
2075
2076static const MemoryRegionOps notdirty_mem_ops = {
2077    .write = notdirty_mem_write,
2078    .valid.accepts = notdirty_mem_accepts,
2079    .endianness = DEVICE_NATIVE_ENDIAN,
2080};
2081
2082/* Generate a debug exception if a watchpoint has been hit.  */
2083static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2084{
2085    CPUState *cpu = current_cpu;
2086    CPUClass *cc = CPU_GET_CLASS(cpu);
2087    CPUArchState *env = cpu->env_ptr;
2088    target_ulong pc, cs_base;
2089    target_ulong vaddr;
2090    CPUWatchpoint *wp;
2091    int cpu_flags;
2092
2093    if (cpu->watchpoint_hit) {
2094        /* We re-entered the check after replacing the TB. Now raise
2095         * the debug interrupt so that is will trigger after the
2096         * current instruction. */
2097        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2098        return;
2099    }
2100    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2101    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2102        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2103            && (wp->flags & flags)) {
2104            if (flags == BP_MEM_READ) {
2105                wp->flags |= BP_WATCHPOINT_HIT_READ;
2106            } else {
2107                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2108            }
2109            wp->hitaddr = vaddr;
2110            wp->hitattrs = attrs;
2111            if (!cpu->watchpoint_hit) {
2112                if (wp->flags & BP_CPU &&
2113                    !cc->debug_check_watchpoint(cpu, wp)) {
2114                    wp->flags &= ~BP_WATCHPOINT_HIT;
2115                    continue;
2116                }
2117                cpu->watchpoint_hit = wp;
2118                tb_check_watchpoint(cpu);
2119                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2120                    cpu->exception_index = EXCP_DEBUG;
2121                    cpu_loop_exit(cpu);
2122                } else {
2123                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2124                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2125                    cpu_resume_from_signal(cpu, NULL);
2126                }
2127            }
2128        } else {
2129            wp->flags &= ~BP_WATCHPOINT_HIT;
2130        }
2131    }
2132}
2133
2134/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2135   so these check for a hit then pass through to the normal out-of-line
2136   phys routines.  */
2137static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2138                                  unsigned size, MemTxAttrs attrs)
2139{
2140    MemTxResult res;
2141    uint64_t data;
2142    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2143    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2144
2145    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2146    switch (size) {
2147    case 1:
2148        data = address_space_ldub(as, addr, attrs, &res);
2149        break;
2150    case 2:
2151        data = address_space_lduw(as, addr, attrs, &res);
2152        break;
2153    case 4:
2154        data = address_space_ldl(as, addr, attrs, &res);
2155        break;
2156    default: abort();
2157    }
2158    *pdata = data;
2159    return res;
2160}
2161
2162static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2163                                   uint64_t val, unsigned size,
2164                                   MemTxAttrs attrs)
2165{
2166    MemTxResult res;
2167    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2168    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2169
2170    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2171    switch (size) {
2172    case 1:
2173        address_space_stb(as, addr, val, attrs, &res);
2174        break;
2175    case 2:
2176        address_space_stw(as, addr, val, attrs, &res);
2177        break;
2178    case 4:
2179        address_space_stl(as, addr, val, attrs, &res);
2180        break;
2181    default: abort();
2182    }
2183    return res;
2184}
2185
2186static const MemoryRegionOps watch_mem_ops = {
2187    .read_with_attrs = watch_mem_read,
2188    .write_with_attrs = watch_mem_write,
2189    .endianness = DEVICE_NATIVE_ENDIAN,
2190};
2191
2192static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2193                                unsigned len, MemTxAttrs attrs)
2194{
2195    subpage_t *subpage = opaque;
2196    uint8_t buf[8];
2197    MemTxResult res;
2198
2199#if defined(DEBUG_SUBPAGE)
2200    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2201           subpage, len, addr);
2202#endif
2203    res = address_space_read(subpage->as, addr + subpage->base,
2204                             attrs, buf, len);
2205    if (res) {
2206        return res;
2207    }
2208    switch (len) {
2209    case 1:
2210        *data = ldub_p(buf);
2211        return MEMTX_OK;
2212    case 2:
2213        *data = lduw_p(buf);
2214        return MEMTX_OK;
2215    case 4:
2216        *data = ldl_p(buf);
2217        return MEMTX_OK;
2218    case 8:
2219        *data = ldq_p(buf);
2220        return MEMTX_OK;
2221    default:
2222        abort();
2223    }
2224}
2225
2226static MemTxResult subpage_write(void *opaque, hwaddr addr,
2227                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2228{
2229    subpage_t *subpage = opaque;
2230    uint8_t buf[8];
2231
2232#if defined(DEBUG_SUBPAGE)
2233    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2234           " value %"PRIx64"\n",
2235           __func__, subpage, len, addr, value);
2236#endif
2237    switch (len) {
2238    case 1:
2239        stb_p(buf, value);
2240        break;
2241    case 2:
2242        stw_p(buf, value);
2243        break;
2244    case 4:
2245        stl_p(buf, value);
2246        break;
2247    case 8:
2248        stq_p(buf, value);
2249        break;
2250    default:
2251        abort();
2252    }
2253    return address_space_write(subpage->as, addr + subpage->base,
2254                               attrs, buf, len);
2255}
2256
2257static bool subpage_accepts(void *opaque, hwaddr addr,
2258                            unsigned len, bool is_write)
2259{
2260    subpage_t *subpage = opaque;
2261#if defined(DEBUG_SUBPAGE)
2262    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2263           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2264#endif
2265
2266    return address_space_access_valid(subpage->as, addr + subpage->base,
2267                                      len, is_write);
2268}
2269
2270static const MemoryRegionOps subpage_ops = {
2271    .read_with_attrs = subpage_read,
2272    .write_with_attrs = subpage_write,
2273    .impl.min_access_size = 1,
2274    .impl.max_access_size = 8,
2275    .valid.min_access_size = 1,
2276    .valid.max_access_size = 8,
2277    .valid.accepts = subpage_accepts,
2278    .endianness = DEVICE_NATIVE_ENDIAN,
2279};
2280
2281static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2282                             uint16_t section)
2283{
2284    int idx, eidx;
2285
2286    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2287        return -1;
2288    idx = SUBPAGE_IDX(start);
2289    eidx = SUBPAGE_IDX(end);
2290#if defined(DEBUG_SUBPAGE)
2291    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2292           __func__, mmio, start, end, idx, eidx, section);
2293#endif
2294    for (; idx <= eidx; idx++) {
2295        mmio->sub_section[idx] = section;
2296    }
2297
2298    return 0;
2299}
2300
2301static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2302{
2303    subpage_t *mmio;
2304
2305    mmio = g_malloc0(sizeof(subpage_t));
2306
2307    mmio->as = as;
2308    mmio->base = base;
2309    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2310                          NULL, TARGET_PAGE_SIZE);
2311    mmio->iomem.subpage = true;
2312#if defined(DEBUG_SUBPAGE)
2313    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2314           mmio, base, TARGET_PAGE_SIZE);
2315#endif
2316    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2317
2318    return mmio;
2319}
2320
2321static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2322                              MemoryRegion *mr)
2323{
2324    assert(as);
2325    MemoryRegionSection section = {
2326        .address_space = as,
2327        .mr = mr,
2328        .offset_within_address_space = 0,
2329        .offset_within_region = 0,
2330        .size = int128_2_64(),
2331    };
2332
2333    return phys_section_add(map, &section);
2334}
2335
2336MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2337{
2338    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2339    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2340    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2341    MemoryRegionSection *sections = d->map.sections;
2342
2343    return sections[index & ~TARGET_PAGE_MASK].mr;
2344}
2345
2346static void io_mem_init(void)
2347{
2348    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2349    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2350                          NULL, UINT64_MAX);
2351    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2352                          NULL, UINT64_MAX);
2353    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2354                          NULL, UINT64_MAX);
2355}
2356
2357static void mem_begin(MemoryListener *listener)
2358{
2359    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2360    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2361    uint16_t n;
2362
2363    n = dummy_section(&d->map, as, &io_mem_unassigned);
2364    assert(n == PHYS_SECTION_UNASSIGNED);
2365    n = dummy_section(&d->map, as, &io_mem_notdirty);
2366    assert(n == PHYS_SECTION_NOTDIRTY);
2367    n = dummy_section(&d->map, as, &io_mem_rom);
2368    assert(n == PHYS_SECTION_ROM);
2369    n = dummy_section(&d->map, as, &io_mem_watch);
2370    assert(n == PHYS_SECTION_WATCH);
2371
2372    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2373    d->as = as;
2374    as->next_dispatch = d;
2375}
2376
2377static void address_space_dispatch_free(AddressSpaceDispatch *d)
2378{
2379    phys_sections_free(&d->map);
2380    g_free(d);
2381}
2382
2383static void mem_commit(MemoryListener *listener)
2384{
2385    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2386    AddressSpaceDispatch *cur = as->dispatch;
2387    AddressSpaceDispatch *next = as->next_dispatch;
2388
2389    phys_page_compact_all(next, next->map.nodes_nb);
2390
2391    atomic_rcu_set(&as->dispatch, next);
2392    if (cur) {
2393        call_rcu(cur, address_space_dispatch_free, rcu);
2394    }
2395}
2396
2397static void tcg_commit(MemoryListener *listener)
2398{
2399    CPUAddressSpace *cpuas;
2400    AddressSpaceDispatch *d;
2401
2402    /* since each CPU stores ram addresses in its TLB cache, we must
2403       reset the modified entries */
2404    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2405    cpu_reloading_memory_map();
2406    /* The CPU and TLB are protected by the iothread lock.
2407     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2408     * may have split the RCU critical section.
2409     */
2410    d = atomic_rcu_read(&cpuas->as->dispatch);
2411    cpuas->memory_dispatch = d;
2412    tlb_flush(cpuas->cpu, 1);
2413}
2414
2415void address_space_init_dispatch(AddressSpace *as)
2416{
2417    as->dispatch = NULL;
2418    as->dispatch_listener = (MemoryListener) {
2419        .begin = mem_begin,
2420        .commit = mem_commit,
2421        .region_add = mem_add,
2422        .region_nop = mem_add,
2423        .priority = 0,
2424    };
2425    memory_listener_register(&as->dispatch_listener, as);
2426}
2427
2428void address_space_unregister(AddressSpace *as)
2429{
2430    memory_listener_unregister(&as->dispatch_listener);
2431}
2432
2433void address_space_destroy_dispatch(AddressSpace *as)
2434{
2435    AddressSpaceDispatch *d = as->dispatch;
2436
2437    atomic_rcu_set(&as->dispatch, NULL);
2438    if (d) {
2439        call_rcu(d, address_space_dispatch_free, rcu);
2440    }
2441}
2442
2443static void memory_map_init(void)
2444{
2445    system_memory = g_malloc(sizeof(*system_memory));
2446
2447    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2448    address_space_init(&address_space_memory, system_memory, "memory");
2449
2450    system_io = g_malloc(sizeof(*system_io));
2451    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2452                          65536);
2453    address_space_init(&address_space_io, system_io, "I/O");
2454}
2455
2456MemoryRegion *get_system_memory(void)
2457{
2458    return system_memory;
2459}
2460
2461MemoryRegion *get_system_io(void)
2462{
2463    return system_io;
2464}
2465
2466#endif /* !defined(CONFIG_USER_ONLY) */
2467
2468/* physical memory access (slow version, mainly for debug) */
2469#if defined(CONFIG_USER_ONLY)
2470int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2471                        uint8_t *buf, int len, int is_write)
2472{
2473    int l, flags;
2474    target_ulong page;
2475    void * p;
2476
2477    while (len > 0) {
2478        page = addr & TARGET_PAGE_MASK;
2479        l = (page + TARGET_PAGE_SIZE) - addr;
2480        if (l > len)
2481            l = len;
2482        flags = page_get_flags(page);
2483        if (!(flags & PAGE_VALID))
2484            return -1;
2485        if (is_write) {
2486            if (!(flags & PAGE_WRITE))
2487                return -1;
2488            /* XXX: this code should not depend on lock_user */
2489            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2490                return -1;
2491            memcpy(p, buf, l);
2492            unlock_user(p, addr, l);
2493        } else {
2494            if (!(flags & PAGE_READ))
2495                return -1;
2496            /* XXX: this code should not depend on lock_user */
2497            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2498                return -1;
2499            memcpy(buf, p, l);
2500            unlock_user(p, addr, 0);
2501        }
2502        len -= l;
2503        buf += l;
2504        addr += l;
2505    }
2506    return 0;
2507}
2508
2509#else
2510
2511static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2512                                     hwaddr length)
2513{
2514    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2515    /* No early return if dirty_log_mask is or becomes 0, because
2516     * cpu_physical_memory_set_dirty_range will still call
2517     * xen_modified_memory.
2518     */
2519    if (dirty_log_mask) {
2520        dirty_log_mask =
2521            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2522    }
2523    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2524        tb_invalidate_phys_range(addr, addr + length);
2525        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2526    }
2527    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2528}
2529
2530static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2531{
2532    unsigned access_size_max = mr->ops->valid.max_access_size;
2533
2534    /* Regions are assumed to support 1-4 byte accesses unless
2535       otherwise specified.  */
2536    if (access_size_max == 0) {
2537        access_size_max = 4;
2538    }
2539
2540    /* Bound the maximum access by the alignment of the address.  */
2541    if (!mr->ops->impl.unaligned) {
2542        unsigned align_size_max = addr & -addr;
2543        if (align_size_max != 0 && align_size_max < access_size_max) {
2544            access_size_max = align_size_max;
2545        }
2546    }
2547
2548    /* Don't attempt accesses larger than the maximum.  */
2549    if (l > access_size_max) {
2550        l = access_size_max;
2551    }
2552    l = pow2floor(l);
2553
2554    return l;
2555}
2556
2557static bool prepare_mmio_access(MemoryRegion *mr)
2558{
2559    bool unlocked = !qemu_mutex_iothread_locked();
2560    bool release_lock = false;
2561
2562    if (unlocked && mr->global_locking) {
2563        qemu_mutex_lock_iothread();
2564        unlocked = false;
2565        release_lock = true;
2566    }
2567    if (mr->flush_coalesced_mmio) {
2568        if (unlocked) {
2569            qemu_mutex_lock_iothread();
2570        }
2571        qemu_flush_coalesced_mmio_buffer();
2572        if (unlocked) {
2573            qemu_mutex_unlock_iothread();
2574        }
2575    }
2576
2577    return release_lock;
2578}
2579
2580/* Called within RCU critical section.  */
2581static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2582                                                MemTxAttrs attrs,
2583                                                const uint8_t *buf,
2584                                                int len, hwaddr addr1,
2585                                                hwaddr l, MemoryRegion *mr)
2586{
2587    uint8_t *ptr;
2588    uint64_t val;
2589    MemTxResult result = MEMTX_OK;
2590    bool release_lock = false;
2591
2592    for (;;) {
2593        if (!memory_access_is_direct(mr, true)) {
2594            release_lock |= prepare_mmio_access(mr);
2595            l = memory_access_size(mr, l, addr1);
2596            /* XXX: could force current_cpu to NULL to avoid
2597               potential bugs */
2598            switch (l) {
2599            case 8:
2600                /* 64 bit write access */
2601                val = ldq_p(buf);
2602                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2603                                                       attrs);
2604                break;
2605            case 4:
2606                /* 32 bit write access */
2607                val = ldl_p(buf);
2608                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2609                                                       attrs);
2610                break;
2611            case 2:
2612                /* 16 bit write access */
2613                val = lduw_p(buf);
2614                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2615                                                       attrs);
2616                break;
2617            case 1:
2618                /* 8 bit write access */
2619                val = ldub_p(buf);
2620                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2621                                                       attrs);
2622                break;
2623            default:
2624                abort();
2625            }
2626        } else {
2627            addr1 += memory_region_get_ram_addr(mr);
2628            /* RAM case */
2629            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2630            memcpy(ptr, buf, l);
2631            invalidate_and_set_dirty(mr, addr1, l);
2632        }
2633
2634        if (release_lock) {
2635            qemu_mutex_unlock_iothread();
2636            release_lock = false;
2637        }
2638
2639        len -= l;
2640        buf += l;
2641        addr += l;
2642
2643        if (!len) {
2644            break;
2645        }
2646
2647        l = len;
2648        mr = address_space_translate(as, addr, &addr1, &l, true);
2649    }
2650
2651    return result;
2652}
2653
2654MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2655                                const uint8_t *buf, int len)
2656{
2657    hwaddr l;
2658    hwaddr addr1;
2659    MemoryRegion *mr;
2660    MemTxResult result = MEMTX_OK;
2661
2662    if (len > 0) {
2663        rcu_read_lock();
2664        l = len;
2665        mr = address_space_translate(as, addr, &addr1, &l, true);
2666        result = address_space_write_continue(as, addr, attrs, buf, len,
2667                                              addr1, l, mr);
2668        rcu_read_unlock();
2669    }
2670
2671    return result;
2672}
2673
2674/* Called within RCU critical section.  */
2675MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2676                                        MemTxAttrs attrs, uint8_t *buf,
2677                                        int len, hwaddr addr1, hwaddr l,
2678                                        MemoryRegion *mr)
2679{
2680    uint8_t *ptr;
2681    uint64_t val;
2682    MemTxResult result = MEMTX_OK;
2683    bool release_lock = false;
2684
2685    for (;;) {
2686        if (!memory_access_is_direct(mr, false)) {
2687            /* I/O case */
2688            release_lock |= prepare_mmio_access(mr);
2689            l = memory_access_size(mr, l, addr1);
2690            switch (l) {
2691            case 8:
2692                /* 64 bit read access */
2693                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2694                                                      attrs);
2695                stq_p(buf, val);
2696                break;
2697            case 4:
2698                /* 32 bit read access */
2699                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2700                                                      attrs);
2701                stl_p(buf, val);
2702                break;
2703            case 2:
2704                /* 16 bit read access */
2705                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2706                                                      attrs);
2707                stw_p(buf, val);
2708                break;
2709            case 1:
2710                /* 8 bit read access */
2711                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2712                                                      attrs);
2713                stb_p(buf, val);
2714                break;
2715            default:
2716                abort();
2717            }
2718        } else {
2719            /* RAM case */
2720            ptr = qemu_get_ram_ptr(mr->ram_block,
2721                                   memory_region_get_ram_addr(mr) + addr1);
2722            memcpy(buf, ptr, l);
2723        }
2724
2725        if (release_lock) {
2726            qemu_mutex_unlock_iothread();
2727            release_lock = false;
2728        }
2729
2730        len -= l;
2731        buf += l;
2732        addr += l;
2733
2734        if (!len) {
2735            break;
2736        }
2737
2738        l = len;
2739        mr = address_space_translate(as, addr, &addr1, &l, false);
2740    }
2741
2742    return result;
2743}
2744
2745MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2746                                    MemTxAttrs attrs, uint8_t *buf, int len)
2747{
2748    hwaddr l;
2749    hwaddr addr1;
2750    MemoryRegion *mr;
2751    MemTxResult result = MEMTX_OK;
2752
2753    if (len > 0) {
2754        rcu_read_lock();
2755        l = len;
2756        mr = address_space_translate(as, addr, &addr1, &l, false);
2757        result = address_space_read_continue(as, addr, attrs, buf, len,
2758                                             addr1, l, mr);
2759        rcu_read_unlock();
2760    }
2761
2762    return result;
2763}
2764
2765MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2766                             uint8_t *buf, int len, bool is_write)
2767{
2768    if (is_write) {
2769        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2770    } else {
2771        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2772    }
2773}
2774
2775void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2776                            int len, int is_write)
2777{
2778    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2779                     buf, len, is_write);
2780}
2781
2782enum write_rom_type {
2783    WRITE_DATA,
2784    FLUSH_CACHE,
2785};
2786
2787static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2788    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2789{
2790    hwaddr l;
2791    uint8_t *ptr;
2792    hwaddr addr1;
2793    MemoryRegion *mr;
2794
2795    rcu_read_lock();
2796    while (len > 0) {
2797        l = len;
2798        mr = address_space_translate(as, addr, &addr1, &l, true);
2799
2800        if (!(memory_region_is_ram(mr) ||
2801              memory_region_is_romd(mr))) {
2802            l = memory_access_size(mr, l, addr1);
2803        } else {
2804            addr1 += memory_region_get_ram_addr(mr);
2805            /* ROM/RAM case */
2806            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2807            switch (type) {
2808            case WRITE_DATA:
2809                memcpy(ptr, buf, l);
2810                invalidate_and_set_dirty(mr, addr1, l);
2811                break;
2812            case FLUSH_CACHE:
2813                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2814                break;
2815            }
2816        }
2817        len -= l;
2818        buf += l;
2819        addr += l;
2820    }
2821    rcu_read_unlock();
2822}
2823
2824/* used for ROM loading : can write in RAM and ROM */
2825void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2826                                   const uint8_t *buf, int len)
2827{
2828    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2829}
2830
2831void cpu_flush_icache_range(hwaddr start, int len)
2832{
2833    /*
2834     * This function should do the same thing as an icache flush that was
2835     * triggered from within the guest. For TCG we are always cache coherent,
2836     * so there is no need to flush anything. For KVM / Xen we need to flush
2837     * the host's instruction cache at least.
2838     */
2839    if (tcg_enabled()) {
2840        return;
2841    }
2842
2843    cpu_physical_memory_write_rom_internal(&address_space_memory,
2844                                           start, NULL, len, FLUSH_CACHE);
2845}
2846
2847typedef struct {
2848    MemoryRegion *mr;
2849    void *buffer;
2850    hwaddr addr;
2851    hwaddr len;
2852    bool in_use;
2853} BounceBuffer;
2854
2855static BounceBuffer bounce;
2856
2857typedef struct MapClient {
2858    QEMUBH *bh;
2859    QLIST_ENTRY(MapClient) link;
2860} MapClient;
2861
2862QemuMutex map_client_list_lock;
2863static QLIST_HEAD(map_client_list, MapClient) map_client_list
2864    = QLIST_HEAD_INITIALIZER(map_client_list);
2865
2866static void cpu_unregister_map_client_do(MapClient *client)
2867{
2868    QLIST_REMOVE(client, link);
2869    g_free(client);
2870}
2871
2872static void cpu_notify_map_clients_locked(void)
2873{
2874    MapClient *client;
2875
2876    while (!QLIST_EMPTY(&map_client_list)) {
2877        client = QLIST_FIRST(&map_client_list);
2878        qemu_bh_schedule(client->bh);
2879        cpu_unregister_map_client_do(client);
2880    }
2881}
2882
2883void cpu_register_map_client(QEMUBH *bh)
2884{
2885    MapClient *client = g_malloc(sizeof(*client));
2886
2887    qemu_mutex_lock(&map_client_list_lock);
2888    client->bh = bh;
2889    QLIST_INSERT_HEAD(&map_client_list, client, link);
2890    if (!atomic_read(&bounce.in_use)) {
2891        cpu_notify_map_clients_locked();
2892    }
2893    qemu_mutex_unlock(&map_client_list_lock);
2894}
2895
2896void cpu_exec_init_all(void)
2897{
2898    qemu_mutex_init(&ram_list.mutex);
2899    io_mem_init();
2900    memory_map_init();
2901    qemu_mutex_init(&map_client_list_lock);
2902}
2903
2904void cpu_unregister_map_client(QEMUBH *bh)
2905{
2906    MapClient *client;
2907
2908    qemu_mutex_lock(&map_client_list_lock);
2909    QLIST_FOREACH(client, &map_client_list, link) {
2910        if (client->bh == bh) {
2911            cpu_unregister_map_client_do(client);
2912            break;
2913        }
2914    }
2915    qemu_mutex_unlock(&map_client_list_lock);
2916}
2917
2918static void cpu_notify_map_clients(void)
2919{
2920    qemu_mutex_lock(&map_client_list_lock);
2921    cpu_notify_map_clients_locked();
2922    qemu_mutex_unlock(&map_client_list_lock);
2923}
2924
2925bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2926{
2927    MemoryRegion *mr;
2928    hwaddr l, xlat;
2929
2930    rcu_read_lock();
2931    while (len > 0) {
2932        l = len;
2933        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2934        if (!memory_access_is_direct(mr, is_write)) {
2935            l = memory_access_size(mr, l, addr);
2936            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2937                return false;
2938            }
2939        }
2940
2941        len -= l;
2942        addr += l;
2943    }
2944    rcu_read_unlock();
2945    return true;
2946}
2947
2948/* Map a physical memory region into a host virtual address.
2949 * May map a subset of the requested range, given by and returned in *plen.
2950 * May return NULL if resources needed to perform the mapping are exhausted.
2951 * Use only for reads OR writes - not for read-modify-write operations.
2952 * Use cpu_register_map_client() to know when retrying the map operation is
2953 * likely to succeed.
2954 */
2955void *address_space_map(AddressSpace *as,
2956                        hwaddr addr,
2957                        hwaddr *plen,
2958                        bool is_write)
2959{
2960    hwaddr len = *plen;
2961    hwaddr done = 0;
2962    hwaddr l, xlat, base;
2963    MemoryRegion *mr, *this_mr;
2964    ram_addr_t raddr;
2965    void *ptr;
2966
2967    if (len == 0) {
2968        return NULL;
2969    }
2970
2971    l = len;
2972    rcu_read_lock();
2973    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2974
2975    if (!memory_access_is_direct(mr, is_write)) {
2976        if (atomic_xchg(&bounce.in_use, true)) {
2977            rcu_read_unlock();
2978            return NULL;
2979        }
2980        /* Avoid unbounded allocations */
2981        l = MIN(l, TARGET_PAGE_SIZE);
2982        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2983        bounce.addr = addr;
2984        bounce.len = l;
2985
2986        memory_region_ref(mr);
2987        bounce.mr = mr;
2988        if (!is_write) {
2989            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2990                               bounce.buffer, l);
2991        }
2992
2993        rcu_read_unlock();
2994        *plen = l;
2995        return bounce.buffer;
2996    }
2997
2998    base = xlat;
2999    raddr = memory_region_get_ram_addr(mr);
3000
3001    for (;;) {
3002        len -= l;
3003        addr += l;
3004        done += l;
3005        if (len == 0) {
3006            break;
3007        }
3008
3009        l = len;
3010        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3011        if (this_mr != mr || xlat != base + done) {
3012            break;
3013        }
3014    }
3015
3016    memory_region_ref(mr);
3017    *plen = done;
3018    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3019    rcu_read_unlock();
3020
3021    return ptr;
3022}
3023
3024/* Unmaps a memory region previously mapped by address_space_map().
3025 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3026 * the amount of memory that was actually read or written by the caller.
3027 */
3028void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3029                         int is_write, hwaddr access_len)
3030{
3031    if (buffer != bounce.buffer) {
3032        MemoryRegion *mr;
3033        ram_addr_t addr1;
3034
3035        mr = qemu_ram_addr_from_host(buffer, &addr1);
3036        assert(mr != NULL);
3037        if (is_write) {
3038            invalidate_and_set_dirty(mr, addr1, access_len);
3039        }
3040        if (xen_enabled()) {
3041            xen_invalidate_map_cache_entry(buffer);
3042        }
3043        memory_region_unref(mr);
3044        return;
3045    }
3046    if (is_write) {
3047        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3048                            bounce.buffer, access_len);
3049    }
3050    qemu_vfree(bounce.buffer);
3051    bounce.buffer = NULL;
3052    memory_region_unref(bounce.mr);
3053    atomic_mb_set(&bounce.in_use, false);
3054    cpu_notify_map_clients();
3055}
3056
3057void *cpu_physical_memory_map(hwaddr addr,
3058                              hwaddr *plen,
3059                              int is_write)
3060{
3061    return address_space_map(&address_space_memory, addr, plen, is_write);
3062}
3063
3064void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3065                               int is_write, hwaddr access_len)
3066{
3067    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3068}
3069
3070/* warning: addr must be aligned */
3071static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3072                                                  MemTxAttrs attrs,
3073                                                  MemTxResult *result,
3074                                                  enum device_endian endian)
3075{
3076    uint8_t *ptr;
3077    uint64_t val;
3078    MemoryRegion *mr;
3079    hwaddr l = 4;
3080    hwaddr addr1;
3081    MemTxResult r;
3082    bool release_lock = false;
3083
3084    rcu_read_lock();
3085    mr = address_space_translate(as, addr, &addr1, &l, false);
3086    if (l < 4 || !memory_access_is_direct(mr, false)) {
3087        release_lock |= prepare_mmio_access(mr);
3088
3089        /* I/O case */
3090        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3091#if defined(TARGET_WORDS_BIGENDIAN)
3092        if (endian == DEVICE_LITTLE_ENDIAN) {
3093            val = bswap32(val);
3094        }
3095#else
3096        if (endian == DEVICE_BIG_ENDIAN) {
3097            val = bswap32(val);
3098        }
3099#endif
3100    } else {
3101        /* RAM case */
3102        ptr = qemu_get_ram_ptr(mr->ram_block,
3103                               (memory_region_get_ram_addr(mr)
3104                                & TARGET_PAGE_MASK)
3105                               + addr1);
3106        switch (endian) {
3107        case DEVICE_LITTLE_ENDIAN:
3108            val = ldl_le_p(ptr);
3109            break;
3110        case DEVICE_BIG_ENDIAN:
3111            val = ldl_be_p(ptr);
3112            break;
3113        default:
3114            val = ldl_p(ptr);
3115            break;
3116        }
3117        r = MEMTX_OK;
3118    }
3119    if (result) {
3120        *result = r;
3121    }
3122    if (release_lock) {
3123        qemu_mutex_unlock_iothread();
3124    }
3125    rcu_read_unlock();
3126    return val;
3127}
3128
3129uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3130                           MemTxAttrs attrs, MemTxResult *result)
3131{
3132    return address_space_ldl_internal(as, addr, attrs, result,
3133                                      DEVICE_NATIVE_ENDIAN);
3134}
3135
3136uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3137                              MemTxAttrs attrs, MemTxResult *result)
3138{
3139    return address_space_ldl_internal(as, addr, attrs, result,
3140                                      DEVICE_LITTLE_ENDIAN);
3141}
3142
3143uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3144                              MemTxAttrs attrs, MemTxResult *result)
3145{
3146    return address_space_ldl_internal(as, addr, attrs, result,
3147                                      DEVICE_BIG_ENDIAN);
3148}
3149
3150uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3151{
3152    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3153}
3154
3155uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3156{
3157    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3158}
3159
3160uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3161{
3162    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3163}
3164
3165/* warning: addr must be aligned */
3166static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3167                                                  MemTxAttrs attrs,
3168                                                  MemTxResult *result,
3169                                                  enum device_endian endian)
3170{
3171    uint8_t *ptr;
3172    uint64_t val;
3173    MemoryRegion *mr;
3174    hwaddr l = 8;
3175    hwaddr addr1;
3176    MemTxResult r;
3177    bool release_lock = false;
3178
3179    rcu_read_lock();
3180    mr = address_space_translate(as, addr, &addr1, &l,
3181                                 false);
3182    if (l < 8 || !memory_access_is_direct(mr, false)) {
3183        release_lock |= prepare_mmio_access(mr);
3184
3185        /* I/O case */
3186        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3187#if defined(TARGET_WORDS_BIGENDIAN)
3188        if (endian == DEVICE_LITTLE_ENDIAN) {
3189            val = bswap64(val);
3190        }
3191#else
3192        if (endian == DEVICE_BIG_ENDIAN) {
3193            val = bswap64(val);
3194        }
3195#endif
3196    } else {
3197        /* RAM case */
3198        ptr = qemu_get_ram_ptr(mr->ram_block,
3199                               (memory_region_get_ram_addr(mr)
3200                                & TARGET_PAGE_MASK)
3201                               + addr1);
3202        switch (endian) {
3203        case DEVICE_LITTLE_ENDIAN:
3204            val = ldq_le_p(ptr);
3205            break;
3206        case DEVICE_BIG_ENDIAN:
3207            val = ldq_be_p(ptr);
3208            break;
3209        default:
3210            val = ldq_p(ptr);
3211            break;
3212        }
3213        r = MEMTX_OK;
3214    }
3215    if (result) {
3216        *result = r;
3217    }
3218    if (release_lock) {
3219        qemu_mutex_unlock_iothread();
3220    }
3221    rcu_read_unlock();
3222    return val;
3223}
3224
3225uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3226                           MemTxAttrs attrs, MemTxResult *result)
3227{
3228    return address_space_ldq_internal(as, addr, attrs, result,
3229                                      DEVICE_NATIVE_ENDIAN);
3230}
3231
3232uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3233                           MemTxAttrs attrs, MemTxResult *result)
3234{
3235    return address_space_ldq_internal(as, addr, attrs, result,
3236                                      DEVICE_LITTLE_ENDIAN);
3237}
3238
3239uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3240                           MemTxAttrs attrs, MemTxResult *result)
3241{
3242    return address_space_ldq_internal(as, addr, attrs, result,
3243                                      DEVICE_BIG_ENDIAN);
3244}
3245
3246uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3247{
3248    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3249}
3250
3251uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3252{
3253    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3254}
3255
3256uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3257{
3258    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3259}
3260
3261/* XXX: optimize */
3262uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3263                            MemTxAttrs attrs, MemTxResult *result)
3264{
3265    uint8_t val;
3266    MemTxResult r;
3267
3268    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3269    if (result) {
3270        *result = r;
3271    }
3272    return val;
3273}
3274
3275uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3276{
3277    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3278}
3279
3280/* warning: addr must be aligned */
3281static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3282                                                   hwaddr addr,
3283                                                   MemTxAttrs attrs,
3284                                                   MemTxResult *result,
3285                                                   enum device_endian endian)
3286{
3287    uint8_t *ptr;
3288    uint64_t val;
3289    MemoryRegion *mr;
3290    hwaddr l = 2;
3291    hwaddr addr1;
3292    MemTxResult r;
3293    bool release_lock = false;
3294
3295    rcu_read_lock();
3296    mr = address_space_translate(as, addr, &addr1, &l,
3297                                 false);
3298    if (l < 2 || !memory_access_is_direct(mr, false)) {
3299        release_lock |= prepare_mmio_access(mr);
3300
3301        /* I/O case */
3302        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3303#if defined(TARGET_WORDS_BIGENDIAN)
3304        if (endian == DEVICE_LITTLE_ENDIAN) {
3305            val = bswap16(val);
3306        }
3307#else
3308        if (endian == DEVICE_BIG_ENDIAN) {
3309            val = bswap16(val);
3310        }
3311#endif
3312    } else {
3313        /* RAM case */
3314        ptr = qemu_get_ram_ptr(mr->ram_block,
3315                               (memory_region_get_ram_addr(mr)
3316                                & TARGET_PAGE_MASK)
3317                               + addr1);
3318        switch (endian) {
3319        case DEVICE_LITTLE_ENDIAN:
3320            val = lduw_le_p(ptr);
3321            break;
3322        case DEVICE_BIG_ENDIAN:
3323            val = lduw_be_p(ptr);
3324            break;
3325        default:
3326            val = lduw_p(ptr);
3327            break;
3328        }
3329        r = MEMTX_OK;
3330    }
3331    if (result) {
3332        *result = r;
3333    }
3334    if (release_lock) {
3335        qemu_mutex_unlock_iothread();
3336    }
3337    rcu_read_unlock();
3338    return val;
3339}
3340
3341uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3342                           MemTxAttrs attrs, MemTxResult *result)
3343{
3344    return address_space_lduw_internal(as, addr, attrs, result,
3345                                       DEVICE_NATIVE_ENDIAN);
3346}
3347
3348uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3349                           MemTxAttrs attrs, MemTxResult *result)
3350{
3351    return address_space_lduw_internal(as, addr, attrs, result,
3352                                       DEVICE_LITTLE_ENDIAN);
3353}
3354
3355uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3356                           MemTxAttrs attrs, MemTxResult *result)
3357{
3358    return address_space_lduw_internal(as, addr, attrs, result,
3359                                       DEVICE_BIG_ENDIAN);
3360}
3361
3362uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3363{
3364    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3365}
3366
3367uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3368{
3369    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370}
3371
3372uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3373{
3374    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3375}
3376
3377/* warning: addr must be aligned. The ram page is not masked as dirty
3378   and the code inside is not invalidated. It is useful if the dirty
3379   bits are used to track modified PTEs */
3380void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3381                                MemTxAttrs attrs, MemTxResult *result)
3382{
3383    uint8_t *ptr;
3384    MemoryRegion *mr;
3385    hwaddr l = 4;
3386    hwaddr addr1;
3387    MemTxResult r;
3388    uint8_t dirty_log_mask;
3389    bool release_lock = false;
3390
3391    rcu_read_lock();
3392    mr = address_space_translate(as, addr, &addr1, &l,
3393                                 true);
3394    if (l < 4 || !memory_access_is_direct(mr, true)) {
3395        release_lock |= prepare_mmio_access(mr);
3396
3397        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3398    } else {
3399        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3400        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3401        stl_p(ptr, val);
3402
3403        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3404        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3405        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3406        r = MEMTX_OK;
3407    }
3408    if (result) {
3409        *result = r;
3410    }
3411    if (release_lock) {
3412        qemu_mutex_unlock_iothread();
3413    }
3414    rcu_read_unlock();
3415}
3416
3417void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3418{
3419    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3420}
3421
3422/* warning: addr must be aligned */
3423static inline void address_space_stl_internal(AddressSpace *as,
3424                                              hwaddr addr, uint32_t val,
3425                                              MemTxAttrs attrs,
3426                                              MemTxResult *result,
3427                                              enum device_endian endian)
3428{
3429    uint8_t *ptr;
3430    MemoryRegion *mr;
3431    hwaddr l = 4;
3432    hwaddr addr1;
3433    MemTxResult r;
3434    bool release_lock = false;
3435
3436    rcu_read_lock();
3437    mr = address_space_translate(as, addr, &addr1, &l,
3438                                 true);
3439    if (l < 4 || !memory_access_is_direct(mr, true)) {
3440        release_lock |= prepare_mmio_access(mr);
3441
3442#if defined(TARGET_WORDS_BIGENDIAN)
3443        if (endian == DEVICE_LITTLE_ENDIAN) {
3444            val = bswap32(val);
3445        }
3446#else
3447        if (endian == DEVICE_BIG_ENDIAN) {
3448            val = bswap32(val);
3449        }
3450#endif
3451        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3452    } else {
3453        /* RAM case */
3454        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3455        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3456        switch (endian) {
3457        case DEVICE_LITTLE_ENDIAN:
3458            stl_le_p(ptr, val);
3459            break;
3460        case DEVICE_BIG_ENDIAN:
3461            stl_be_p(ptr, val);
3462            break;
3463        default:
3464            stl_p(ptr, val);
3465            break;
3466        }
3467        invalidate_and_set_dirty(mr, addr1, 4);
3468        r = MEMTX_OK;
3469    }
3470    if (result) {
3471        *result = r;
3472    }
3473    if (release_lock) {
3474        qemu_mutex_unlock_iothread();
3475    }
3476    rcu_read_unlock();
3477}
3478
3479void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3480                       MemTxAttrs attrs, MemTxResult *result)
3481{
3482    address_space_stl_internal(as, addr, val, attrs, result,
3483                               DEVICE_NATIVE_ENDIAN);
3484}
3485
3486void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3487                       MemTxAttrs attrs, MemTxResult *result)
3488{
3489    address_space_stl_internal(as, addr, val, attrs, result,
3490                               DEVICE_LITTLE_ENDIAN);
3491}
3492
3493void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3494                       MemTxAttrs attrs, MemTxResult *result)
3495{
3496    address_space_stl_internal(as, addr, val, attrs, result,
3497                               DEVICE_BIG_ENDIAN);
3498}
3499
3500void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3501{
3502    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3503}
3504
3505void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3506{
3507    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3508}
3509
3510void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3511{
3512    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3513}
3514
3515/* XXX: optimize */
3516void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3517                       MemTxAttrs attrs, MemTxResult *result)
3518{
3519    uint8_t v = val;
3520    MemTxResult r;
3521
3522    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3523    if (result) {
3524        *result = r;
3525    }
3526}
3527
3528void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3529{
3530    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3531}
3532
3533/* warning: addr must be aligned */
3534static inline void address_space_stw_internal(AddressSpace *as,
3535                                              hwaddr addr, uint32_t val,
3536                                              MemTxAttrs attrs,
3537                                              MemTxResult *result,
3538                                              enum device_endian endian)
3539{
3540    uint8_t *ptr;
3541    MemoryRegion *mr;
3542    hwaddr l = 2;
3543    hwaddr addr1;
3544    MemTxResult r;
3545    bool release_lock = false;
3546
3547    rcu_read_lock();
3548    mr = address_space_translate(as, addr, &addr1, &l, true);
3549    if (l < 2 || !memory_access_is_direct(mr, true)) {
3550        release_lock |= prepare_mmio_access(mr);
3551
3552#if defined(TARGET_WORDS_BIGENDIAN)
3553        if (endian == DEVICE_LITTLE_ENDIAN) {
3554            val = bswap16(val);
3555        }
3556#else
3557        if (endian == DEVICE_BIG_ENDIAN) {
3558            val = bswap16(val);
3559        }
3560#endif
3561        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3562    } else {
3563        /* RAM case */
3564        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3565        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3566        switch (endian) {
3567        case DEVICE_LITTLE_ENDIAN:
3568            stw_le_p(ptr, val);
3569            break;
3570        case DEVICE_BIG_ENDIAN:
3571            stw_be_p(ptr, val);
3572            break;
3573        default:
3574            stw_p(ptr, val);
3575            break;
3576        }
3577        invalidate_and_set_dirty(mr, addr1, 2);
3578        r = MEMTX_OK;
3579    }
3580    if (result) {
3581        *result = r;
3582    }
3583    if (release_lock) {
3584        qemu_mutex_unlock_iothread();
3585    }
3586    rcu_read_unlock();
3587}
3588
3589void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3590                       MemTxAttrs attrs, MemTxResult *result)
3591{
3592    address_space_stw_internal(as, addr, val, attrs, result,
3593                               DEVICE_NATIVE_ENDIAN);
3594}
3595
3596void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3597                       MemTxAttrs attrs, MemTxResult *result)
3598{
3599    address_space_stw_internal(as, addr, val, attrs, result,
3600                               DEVICE_LITTLE_ENDIAN);
3601}
3602
3603void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3604                       MemTxAttrs attrs, MemTxResult *result)
3605{
3606    address_space_stw_internal(as, addr, val, attrs, result,
3607                               DEVICE_BIG_ENDIAN);
3608}
3609
3610void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3611{
3612    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3613}
3614
3615void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3616{
3617    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3618}
3619
3620void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3621{
3622    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3623}
3624
3625/* XXX: optimize */
3626void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3627                       MemTxAttrs attrs, MemTxResult *result)
3628{
3629    MemTxResult r;
3630    val = tswap64(val);
3631    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3632    if (result) {
3633        *result = r;
3634    }
3635}
3636
3637void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3638                       MemTxAttrs attrs, MemTxResult *result)
3639{
3640    MemTxResult r;
3641    val = cpu_to_le64(val);
3642    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3643    if (result) {
3644        *result = r;
3645    }
3646}
3647void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3648                       MemTxAttrs attrs, MemTxResult *result)
3649{
3650    MemTxResult r;
3651    val = cpu_to_be64(val);
3652    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3653    if (result) {
3654        *result = r;
3655    }
3656}
3657
3658void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3659{
3660    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3661}
3662
3663void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3664{
3665    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3666}
3667
3668void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3669{
3670    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3671}
3672
3673/* virtual memory access for debug (includes writing to ROM) */
3674int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3675                        uint8_t *buf, int len, int is_write)
3676{
3677    int l;
3678    hwaddr phys_addr;
3679    target_ulong page;
3680
3681    while (len > 0) {
3682        int asidx;
3683        MemTxAttrs attrs;
3684
3685        page = addr & TARGET_PAGE_MASK;
3686        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3687        asidx = cpu_asidx_from_attrs(cpu, attrs);
3688        /* if no physical page mapped, return an error */
3689        if (phys_addr == -1)
3690            return -1;
3691        l = (page + TARGET_PAGE_SIZE) - addr;
3692        if (l > len)
3693            l = len;
3694        phys_addr += (addr & ~TARGET_PAGE_MASK);
3695        if (is_write) {
3696            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3697                                          phys_addr, buf, l);
3698        } else {
3699            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3700                             MEMTXATTRS_UNSPECIFIED,
3701                             buf, l, 0);
3702        }
3703        len -= l;
3704        buf += l;
3705        addr += l;
3706    }
3707    return 0;
3708}
3709
3710/*
3711 * Allows code that needs to deal with migration bitmaps etc to still be built
3712 * target independent.
3713 */
3714size_t qemu_target_page_bits(void)
3715{
3716    return TARGET_PAGE_BITS;
3717}
3718
3719#endif
3720
3721/*
3722 * A helper function for the _utterly broken_ virtio device model to find out if
3723 * it's running on a big endian machine. Don't do this at home kids!
3724 */
3725bool target_words_bigendian(void);
3726bool target_words_bigendian(void)
3727{
3728#if defined(TARGET_WORDS_BIGENDIAN)
3729    return true;
3730#else
3731    return false;
3732#endif
3733}
3734
3735#ifndef CONFIG_USER_ONLY
3736bool cpu_physical_memory_is_io(hwaddr phys_addr)
3737{
3738    MemoryRegion*mr;
3739    hwaddr l = 1;
3740    bool res;
3741
3742    rcu_read_lock();
3743    mr = address_space_translate(&address_space_memory,
3744                                 phys_addr, &phys_addr, &l, false);
3745
3746    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3747    rcu_read_unlock();
3748    return res;
3749}
3750
3751int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3752{
3753    RAMBlock *block;
3754    int ret = 0;
3755
3756    rcu_read_lock();
3757    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3758        ret = func(block->idstr, block->host, block->offset,
3759                   block->used_length, opaque);
3760        if (ret) {
3761            break;
3762        }
3763    }
3764    rcu_read_unlock();
3765    return ret;
3766}
3767#endif
3768