qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu/cutils.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "hw/xen/xen.h"
  36#include "qemu/timer.h"
  37#include "qemu/config-file.h"
  38#include "qemu/error-report.h"
  39#include "exec/memory.h"
  40#include "sysemu/dma.h"
  41#include "exec/address-spaces.h"
  42#if defined(CONFIG_USER_ONLY)
  43#include <qemu.h>
  44#else /* !CONFIG_USER_ONLY */
  45#include "sysemu/xen-mapcache.h"
  46#include "trace.h"
  47#endif
  48#include "exec/cpu-all.h"
  49#include "qemu/rcu_queue.h"
  50#include "qemu/main-loop.h"
  51#include "translate-all.h"
  52#include "sysemu/replay.h"
  53
  54#include "exec/memory-internal.h"
  55#include "exec/ram_addr.h"
  56#include "exec/log.h"
  57
  58#include "qemu/range.h"
  59#ifndef _WIN32
  60#include "qemu/mmap-alloc.h"
  61#endif
  62
  63//#define DEBUG_SUBPAGE
  64
  65#if !defined(CONFIG_USER_ONLY)
  66/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67 * are protected by the ramlist lock.
  68 */
  69RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71static MemoryRegion *system_memory;
  72static MemoryRegion *system_io;
  73
  74AddressSpace address_space_io;
  75AddressSpace address_space_memory;
  76
  77MemoryRegion io_mem_rom, io_mem_notdirty;
  78static MemoryRegion io_mem_unassigned;
  79
  80/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81#define RAM_PREALLOC   (1 << 0)
  82
  83/* RAM is mmap-ed with MAP_SHARED */
  84#define RAM_SHARED     (1 << 1)
  85
  86/* Only a portion of RAM (used_length) is actually used, and migrated.
  87 * This used_length size can change across reboots.
  88 */
  89#define RAM_RESIZEABLE (1 << 2)
  90
  91/* RAM is backed by an mmapped file.
  92 */
  93#define RAM_FILE (1 << 3)
  94#endif
  95
  96struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  97/* current CPU in the current thread. It is only valid inside
  98   cpu_exec() */
  99__thread CPUState *current_cpu;
 100/* 0 = Do not count executed instructions.
 101   1 = Precise instruction counting.
 102   2 = Adaptive rate instruction counting.  */
 103int use_icount;
 104
 105#if !defined(CONFIG_USER_ONLY)
 106
 107typedef struct PhysPageEntry PhysPageEntry;
 108
 109struct PhysPageEntry {
 110    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 111    uint32_t skip : 6;
 112     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 113    uint32_t ptr : 26;
 114};
 115
 116#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 117
 118/* Size of the L2 (and L3, etc) page tables.  */
 119#define ADDR_SPACE_BITS 64
 120
 121#define P_L2_BITS 9
 122#define P_L2_SIZE (1 << P_L2_BITS)
 123
 124#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 125
 126typedef PhysPageEntry Node[P_L2_SIZE];
 127
 128typedef struct PhysPageMap {
 129    struct rcu_head rcu;
 130
 131    unsigned sections_nb;
 132    unsigned sections_nb_alloc;
 133    unsigned nodes_nb;
 134    unsigned nodes_nb_alloc;
 135    Node *nodes;
 136    MemoryRegionSection *sections;
 137} PhysPageMap;
 138
 139struct AddressSpaceDispatch {
 140    struct rcu_head rcu;
 141
 142    MemoryRegionSection *mru_section;
 143    /* This is a multi-level map on the physical address space.
 144     * The bottom level has pointers to MemoryRegionSections.
 145     */
 146    PhysPageEntry phys_map;
 147    PhysPageMap map;
 148    AddressSpace *as;
 149};
 150
 151#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 152typedef struct subpage_t {
 153    MemoryRegion iomem;
 154    AddressSpace *as;
 155    hwaddr base;
 156    uint16_t sub_section[TARGET_PAGE_SIZE];
 157} subpage_t;
 158
 159#define PHYS_SECTION_UNASSIGNED 0
 160#define PHYS_SECTION_NOTDIRTY 1
 161#define PHYS_SECTION_ROM 2
 162#define PHYS_SECTION_WATCH 3
 163
 164static void io_mem_init(void);
 165static void memory_map_init(void);
 166static void tcg_commit(MemoryListener *listener);
 167
 168static MemoryRegion io_mem_watch;
 169
 170/**
 171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 172 * @cpu: the CPU whose AddressSpace this is
 173 * @as: the AddressSpace itself
 174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 176 */
 177struct CPUAddressSpace {
 178    CPUState *cpu;
 179    AddressSpace *as;
 180    struct AddressSpaceDispatch *memory_dispatch;
 181    MemoryListener tcg_as_listener;
 182};
 183
 184#endif
 185
 186#if !defined(CONFIG_USER_ONLY)
 187
 188static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 189{
 190    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 191        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 192        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 193        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 194    }
 195}
 196
 197static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 198{
 199    unsigned i;
 200    uint32_t ret;
 201    PhysPageEntry e;
 202    PhysPageEntry *p;
 203
 204    ret = map->nodes_nb++;
 205    p = map->nodes[ret];
 206    assert(ret != PHYS_MAP_NODE_NIL);
 207    assert(ret != map->nodes_nb_alloc);
 208
 209    e.skip = leaf ? 0 : 1;
 210    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 211    for (i = 0; i < P_L2_SIZE; ++i) {
 212        memcpy(&p[i], &e, sizeof(e));
 213    }
 214    return ret;
 215}
 216
 217static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 218                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 219                                int level)
 220{
 221    PhysPageEntry *p;
 222    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 223
 224    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 225        lp->ptr = phys_map_node_alloc(map, level == 0);
 226    }
 227    p = map->nodes[lp->ptr];
 228    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 229
 230    while (*nb && lp < &p[P_L2_SIZE]) {
 231        if ((*index & (step - 1)) == 0 && *nb >= step) {
 232            lp->skip = 0;
 233            lp->ptr = leaf;
 234            *index += step;
 235            *nb -= step;
 236        } else {
 237            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 238        }
 239        ++lp;
 240    }
 241}
 242
 243static void phys_page_set(AddressSpaceDispatch *d,
 244                          hwaddr index, hwaddr nb,
 245                          uint16_t leaf)
 246{
 247    /* Wildly overreserve - it doesn't matter much. */
 248    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 249
 250    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 251}
 252
 253/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 254 * and update our entry so we can skip it and go directly to the destination.
 255 */
 256static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 257{
 258    unsigned valid_ptr = P_L2_SIZE;
 259    int valid = 0;
 260    PhysPageEntry *p;
 261    int i;
 262
 263    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 264        return;
 265    }
 266
 267    p = nodes[lp->ptr];
 268    for (i = 0; i < P_L2_SIZE; i++) {
 269        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 270            continue;
 271        }
 272
 273        valid_ptr = i;
 274        valid++;
 275        if (p[i].skip) {
 276            phys_page_compact(&p[i], nodes, compacted);
 277        }
 278    }
 279
 280    /* We can only compress if there's only one child. */
 281    if (valid != 1) {
 282        return;
 283    }
 284
 285    assert(valid_ptr < P_L2_SIZE);
 286
 287    /* Don't compress if it won't fit in the # of bits we have. */
 288    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 289        return;
 290    }
 291
 292    lp->ptr = p[valid_ptr].ptr;
 293    if (!p[valid_ptr].skip) {
 294        /* If our only child is a leaf, make this a leaf. */
 295        /* By design, we should have made this node a leaf to begin with so we
 296         * should never reach here.
 297         * But since it's so simple to handle this, let's do it just in case we
 298         * change this rule.
 299         */
 300        lp->skip = 0;
 301    } else {
 302        lp->skip += p[valid_ptr].skip;
 303    }
 304}
 305
 306static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 307{
 308    DECLARE_BITMAP(compacted, nodes_nb);
 309
 310    if (d->phys_map.skip) {
 311        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 312    }
 313}
 314
 315static inline bool section_covers_addr(const MemoryRegionSection *section,
 316                                       hwaddr addr)
 317{
 318    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 319     * the section must cover the entire address space.
 320     */
 321    return section->size.hi ||
 322           range_covers_byte(section->offset_within_address_space,
 323                             section->size.lo, addr);
 324}
 325
 326static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 327                                           Node *nodes, MemoryRegionSection *sections)
 328{
 329    PhysPageEntry *p;
 330    hwaddr index = addr >> TARGET_PAGE_BITS;
 331    int i;
 332
 333    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 334        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 335            return &sections[PHYS_SECTION_UNASSIGNED];
 336        }
 337        p = nodes[lp.ptr];
 338        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 339    }
 340
 341    if (section_covers_addr(&sections[lp.ptr], addr)) {
 342        return &sections[lp.ptr];
 343    } else {
 344        return &sections[PHYS_SECTION_UNASSIGNED];
 345    }
 346}
 347
 348bool memory_region_is_unassigned(MemoryRegion *mr)
 349{
 350    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 351        && mr != &io_mem_watch;
 352}
 353
 354/* Called from RCU critical section */
 355static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 356                                                        hwaddr addr,
 357                                                        bool resolve_subpage)
 358{
 359    MemoryRegionSection *section = atomic_read(&d->mru_section);
 360    subpage_t *subpage;
 361    bool update;
 362
 363    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 364        section_covers_addr(section, addr)) {
 365        update = false;
 366    } else {
 367        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 368                                 d->map.sections);
 369        update = true;
 370    }
 371    if (resolve_subpage && section->mr->subpage) {
 372        subpage = container_of(section->mr, subpage_t, iomem);
 373        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 374    }
 375    if (update) {
 376        atomic_set(&d->mru_section, section);
 377    }
 378    return section;
 379}
 380
 381/* Called from RCU critical section */
 382static MemoryRegionSection *
 383address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 384                                 hwaddr *plen, bool resolve_subpage)
 385{
 386    MemoryRegionSection *section;
 387    MemoryRegion *mr;
 388    Int128 diff;
 389
 390    section = address_space_lookup_region(d, addr, resolve_subpage);
 391    /* Compute offset within MemoryRegionSection */
 392    addr -= section->offset_within_address_space;
 393
 394    /* Compute offset within MemoryRegion */
 395    *xlat = addr + section->offset_within_region;
 396
 397    mr = section->mr;
 398
 399    /* MMIO registers can be expected to perform full-width accesses based only
 400     * on their address, without considering adjacent registers that could
 401     * decode to completely different MemoryRegions.  When such registers
 402     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 403     * regions overlap wildly.  For this reason we cannot clamp the accesses
 404     * here.
 405     *
 406     * If the length is small (as is the case for address_space_ldl/stl),
 407     * everything works fine.  If the incoming length is large, however,
 408     * the caller really has to do the clamping through memory_access_size.
 409     */
 410    if (memory_region_is_ram(mr)) {
 411        diff = int128_sub(section->size, int128_make64(addr));
 412        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 413    }
 414    return section;
 415}
 416
 417MemoryRegion *address_space_translate_attr(AddressSpace *as, hwaddr addr,
 418                                           hwaddr *xlat, hwaddr *plen,
 419                                           bool is_write,
 420                                           MemTxAttrs *attr)
 421{
 422    IOMMUTLBEntry iotlb;
 423    MemoryRegionSection *section;
 424    MemoryRegion *mr;
 425
 426    for (;;) {
 427        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 428        section = address_space_translate_internal(d, addr, &addr, plen, true);
 429        mr = section->mr;
 430
 431        if (!mr->iommu_ops) {
 432            break;
 433        }
 434
 435        if (mr->iommu_ops->translate_attr) {
 436            iotlb = mr->iommu_ops->translate_attr(mr, addr, is_write, attr);
 437        } else {
 438            iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 439        }
 440
 441        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 442                | (addr & iotlb.addr_mask));
 443        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 444        if (!(iotlb.perm & (1 << is_write))) {
 445            mr = &io_mem_unassigned;
 446            break;
 447        }
 448
 449        as = iotlb.target_as;
 450    }
 451
 452    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 453        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 454        *plen = MIN(page, *plen);
 455    }
 456
 457    *xlat = addr;
 458    return mr;
 459}
 460
 461MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 462                                      hwaddr *xlat, hwaddr *plen,
 463                                      bool is_write)
 464{
 465    MemTxAttrs attr = MEMTXATTRS_UNSPECIFIED;
 466    return address_space_translate_attr(as, addr, xlat, plen, is_write,
 467                                        &attr);
 468}
 469
 470/* Called from RCU critical section */
 471MemoryRegionSection *
 472address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 473                                  hwaddr *xlat, hwaddr *plen, int *prot,
 474                                  MemTxAttrs *attr)
 475{
 476    MemoryRegionSection *section;
 477    AddressSpace *as = cpu->cpu_ases[asidx].memory_dispatch->as;
 478
 479    IOMMUTLBEntry iotlb;
 480    struct {
 481        MemoryRegionSection *section;
 482        hwaddr addr;
 483        hwaddr len;
 484    } root =  { .section = NULL, .addr = addr};
 485    AddressSpace *orig_as = as;
 486    MemoryRegion *mr;
 487    hwaddr len = *plen;
 488
 489    assert(prot);
 490
 491    while (1) {
 492        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 493        section = address_space_translate_internal(d, addr, &addr, plen, false);
 494        mr = section->mr;
 495
 496        if (!mr->iommu_ops) {
 497            break;
 498        }
 499
 500        /* FIXME: these are not necessarily accesses, so is_write doesn't make
 501           sense!  */
 502        if (mr->iommu_ops->translate_attr) {
 503            iotlb = mr->iommu_ops->translate_attr(mr, addr, false, attr);
 504        } else {
 505            iotlb = mr->iommu_ops->translate(mr, addr, false);
 506        }
 507        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 508                | (addr & iotlb.addr_mask));
 509        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 510        as = iotlb.target_as;
 511
 512        if (!root.section && orig_as != as) {
 513            root.section = section;
 514            root.len = *plen;
 515        }
 516    }
 517
 518    *plen = len;
 519    *xlat = addr;
 520
 521    /* If the IOMMU translated addr into IO in a different AS, refer to
 522     * the IOMMU itself and do a slow translated access at access time.
 523     * TODO: If the iotlb could record dst AS, this wouldn't be needed.
 524     */
 525    if (!memory_region_is_ram(section->mr) && as != orig_as) {
 526        *plen = root.len;
 527        *xlat = root.addr;
 528        section = root.section;
 529    }
 530//    qemu_log("as=%p mr=%p addr=%lx len=%lx\n", as, section->mr, *xlat, *plen);
 531    return section;
 532}
 533#endif
 534
 535#if !defined(CONFIG_USER_ONLY)
 536
 537static int cpu_common_post_load(void *opaque, int version_id)
 538{
 539    CPUState *cpu = opaque;
 540
 541    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 542       version_id is increased. */
 543    cpu->interrupt_request &= ~0x01;
 544    tlb_flush(cpu, 1);
 545
 546    return 0;
 547}
 548
 549static int cpu_common_pre_load(void *opaque)
 550{
 551    CPUState *cpu = opaque;
 552
 553    cpu->exception_index = -1;
 554
 555    return 0;
 556}
 557
 558static bool cpu_common_exception_index_needed(void *opaque)
 559{
 560    CPUState *cpu = opaque;
 561
 562    return tcg_enabled() && cpu->exception_index != -1;
 563}
 564
 565static const VMStateDescription vmstate_cpu_common_exception_index = {
 566    .name = "cpu_common/exception_index",
 567    .version_id = 1,
 568    .minimum_version_id = 1,
 569    .needed = cpu_common_exception_index_needed,
 570    .fields = (VMStateField[]) {
 571        VMSTATE_INT32(exception_index, CPUState),
 572        VMSTATE_END_OF_LIST()
 573    }
 574};
 575
 576static bool cpu_common_crash_occurred_needed(void *opaque)
 577{
 578    CPUState *cpu = opaque;
 579
 580    return cpu->crash_occurred;
 581}
 582
 583static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 584    .name = "cpu_common/crash_occurred",
 585    .version_id = 1,
 586    .minimum_version_id = 1,
 587    .needed = cpu_common_crash_occurred_needed,
 588    .fields = (VMStateField[]) {
 589        VMSTATE_BOOL(crash_occurred, CPUState),
 590        VMSTATE_END_OF_LIST()
 591    }
 592};
 593
 594const VMStateDescription vmstate_cpu_common = {
 595    .name = "cpu_common",
 596    .version_id = 1,
 597    .minimum_version_id = 1,
 598    .pre_load = cpu_common_pre_load,
 599    .post_load = cpu_common_post_load,
 600    .fields = (VMStateField[]) {
 601        VMSTATE_UINT32(halted, CPUState),
 602        VMSTATE_UINT32(interrupt_request, CPUState),
 603        VMSTATE_END_OF_LIST()
 604    },
 605    .subsections = (const VMStateDescription*[]) {
 606        &vmstate_cpu_common_exception_index,
 607        &vmstate_cpu_common_crash_occurred,
 608        NULL
 609    }
 610};
 611
 612#endif
 613
 614CPUState *qemu_get_cpu(int index)
 615{
 616    CPUState *cpu;
 617
 618    CPU_FOREACH(cpu) {
 619        if (cpu->cpu_index == index) {
 620            return cpu;
 621        }
 622    }
 623
 624    return NULL;
 625}
 626
 627#if !defined(CONFIG_USER_ONLY)
 628void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 629{
 630    CPUAddressSpace *newas;
 631
 632    /* Target code should have set num_ases before calling us */
 633    assert(asidx < cpu->num_ases);
 634
 635    if (asidx == 0) {
 636        /* address space 0 gets the convenience alias */
 637        cpu->as = as;
 638    }
 639
 640    /* KVM cannot currently support multiple address spaces. */
 641    assert(asidx == 0 || !kvm_enabled());
 642
 643    if (!cpu->cpu_ases) {
 644        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 645    }
 646
 647    newas = &cpu->cpu_ases[asidx];
 648    newas->cpu = cpu;
 649    newas->as = as;
 650    if (tcg_enabled()) {
 651        newas->tcg_as_listener.commit = tcg_commit;
 652        memory_listener_register(&newas->tcg_as_listener, as);
 653    }
 654}
 655
 656AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 657{
 658    /* Return the AddressSpace corresponding to the specified index */
 659    return cpu->cpu_ases[asidx].as;
 660}
 661#endif
 662
 663#ifndef CONFIG_USER_ONLY
 664static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 665
 666static int cpu_get_free_index(Error **errp)
 667{
 668    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 669
 670    if (cpu >= MAX_CPUMASK_BITS) {
 671        error_setg(errp, "Trying to use more CPUs than max of %d",
 672                   MAX_CPUMASK_BITS);
 673        return -1;
 674    }
 675
 676    bitmap_set(cpu_index_map, cpu, 1);
 677    return cpu;
 678}
 679
 680void cpu_exec_exit(CPUState *cpu)
 681{
 682    if (cpu->cpu_index == -1) {
 683        /* cpu_index was never allocated by this @cpu or was already freed. */
 684        return;
 685    }
 686
 687    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 688    cpu->cpu_index = -1;
 689}
 690#else
 691
 692static int cpu_get_free_index(Error **errp)
 693{
 694    CPUState *some_cpu;
 695    int cpu_index = 0;
 696
 697    CPU_FOREACH(some_cpu) {
 698        cpu_index++;
 699    }
 700    return cpu_index;
 701}
 702
 703void cpu_exec_exit(CPUState *cpu)
 704{
 705}
 706#endif
 707
 708void cpu_exec_init(CPUState *cpu, Error **errp)
 709{
 710    CPUClass *cc = CPU_GET_CLASS(cpu);
 711    int cpu_index;
 712    Error *local_err = NULL;
 713
 714    cpu->as = NULL;
 715    cpu->num_ases = 0;
 716
 717#ifndef CONFIG_USER_ONLY
 718    cpu->thread_id = qemu_get_thread_id();
 719
 720    /* This is a softmmu CPU object, so create a property for it
 721     * so users can wire up its memory. (This can't go in qom/cpu.c
 722     * because that file is compiled only once for both user-mode
 723     * and system builds.) The default if no link is set up is to use
 724     * the system address space.
 725     */
 726    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 727                             (Object **)&cpu->memory,
 728                             qdev_prop_allow_set_link_before_realize,
 729                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 730                             &error_abort);
 731    cpu->memory = system_memory;
 732    object_ref(OBJECT(cpu->memory));
 733#endif
 734
 735#if defined(CONFIG_USER_ONLY)
 736    cpu_list_lock();
 737#endif
 738    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 739    if (local_err) {
 740        error_propagate(errp, local_err);
 741#if defined(CONFIG_USER_ONLY)
 742        cpu_list_unlock();
 743#endif
 744        return;
 745    }
 746    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 747#if defined(CONFIG_USER_ONLY)
 748    cpu_list_unlock();
 749#endif
 750    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 751        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 752    }
 753    if (cc->vmsd != NULL) {
 754        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 755    }
 756}
 757
 758#if defined(CONFIG_USER_ONLY)
 759static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 760{
 761    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 762}
 763#else
 764static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 765{
 766    MemTxAttrs attrs;
 767    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 768    int asidx = cpu_asidx_from_attrs(cpu, attrs);
 769    if (phys != -1) {
 770        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 771                                phys | (pc & ~TARGET_PAGE_MASK));
 772    }
 773}
 774#endif
 775
 776#if defined(CONFIG_USER_ONLY)
 777void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 778
 779{
 780}
 781
 782int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 783                          int flags)
 784{
 785    return -ENOSYS;
 786}
 787
 788void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 789{
 790}
 791
 792int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 793                          int flags, CPUWatchpoint **watchpoint)
 794{
 795    return -ENOSYS;
 796}
 797#else
 798/* Add a watchpoint.  */
 799int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 800                          int flags, CPUWatchpoint **watchpoint)
 801{
 802    CPUWatchpoint *wp;
 803
 804    /* forbid ranges which are empty or run off the end of the address space */
 805    if (len == 0 || (addr + len - 1) < addr) {
 806        error_report("tried to set invalid watchpoint at %"
 807                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 808        return -EINVAL;
 809    }
 810    wp = g_malloc(sizeof(*wp));
 811
 812    wp->vaddr = addr;
 813    wp->len = len;
 814    wp->flags = flags;
 815
 816    /* keep all GDB-injected watchpoints in front */
 817    if (flags & BP_GDB) {
 818        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 819    } else {
 820        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 821    }
 822
 823    tlb_flush_page(cpu, addr);
 824
 825    if (watchpoint)
 826        *watchpoint = wp;
 827    return 0;
 828}
 829
 830/* Remove a specific watchpoint.  */
 831int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 832                          int flags)
 833{
 834    CPUWatchpoint *wp;
 835
 836    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 837        if (addr == wp->vaddr && len == wp->len
 838                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 839            cpu_watchpoint_remove_by_ref(cpu, wp);
 840            return 0;
 841        }
 842    }
 843    return -ENOENT;
 844}
 845
 846/* Remove a specific watchpoint by reference.  */
 847void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 848{
 849    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 850
 851    tlb_flush_page(cpu, watchpoint->vaddr);
 852
 853    g_free(watchpoint);
 854}
 855
 856/* Remove all matching watchpoints.  */
 857void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 858{
 859    CPUWatchpoint *wp, *next;
 860
 861    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 862        if (wp->flags & mask) {
 863            cpu_watchpoint_remove_by_ref(cpu, wp);
 864        }
 865    }
 866}
 867
 868/* Return true if this watchpoint address matches the specified
 869 * access (ie the address range covered by the watchpoint overlaps
 870 * partially or completely with the address range covered by the
 871 * access).
 872 */
 873static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 874                                                  vaddr addr,
 875                                                  vaddr len)
 876{
 877    /* We know the lengths are non-zero, but a little caution is
 878     * required to avoid errors in the case where the range ends
 879     * exactly at the top of the address space and so addr + len
 880     * wraps round to zero.
 881     */
 882    vaddr wpend = wp->vaddr + wp->len - 1;
 883    vaddr addrend = addr + len - 1;
 884
 885    return !(addr > wpend || wp->vaddr > addrend);
 886}
 887
 888#endif
 889
 890/* Add a breakpoint.  */
 891int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 892                          CPUBreakpoint **breakpoint)
 893{
 894    CPUBreakpoint *bp;
 895
 896    bp = g_malloc(sizeof(*bp));
 897
 898    bp->pc = pc;
 899    bp->flags = flags;
 900
 901    /* keep all GDB-injected breakpoints in front */
 902    if (flags & BP_GDB) {
 903        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 904    } else {
 905        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 906    }
 907
 908    breakpoint_invalidate(cpu, pc);
 909
 910    if (breakpoint) {
 911        *breakpoint = bp;
 912    }
 913    return 0;
 914}
 915
 916/* Remove a specific breakpoint.  */
 917int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 918{
 919    CPUBreakpoint *bp;
 920
 921    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 922        if (bp->pc == pc && bp->flags == flags) {
 923            cpu_breakpoint_remove_by_ref(cpu, bp);
 924            return 0;
 925        }
 926    }
 927    return -ENOENT;
 928}
 929
 930/* Remove a specific breakpoint by reference.  */
 931void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 932{
 933    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 934
 935    breakpoint_invalidate(cpu, breakpoint->pc);
 936
 937    g_free(breakpoint);
 938}
 939
 940/* Remove all matching breakpoints. */
 941void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 942{
 943    CPUBreakpoint *bp, *next;
 944
 945    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 946        if (bp->flags & mask) {
 947            cpu_breakpoint_remove_by_ref(cpu, bp);
 948        }
 949    }
 950}
 951
 952/* enable or disable single step mode. EXCP_DEBUG is returned by the
 953   CPU loop after each instruction */
 954void cpu_single_step(CPUState *cpu, int enabled)
 955{
 956    if (cpu->singlestep_enabled != enabled) {
 957        cpu->singlestep_enabled = enabled;
 958        if (kvm_enabled()) {
 959            kvm_update_guest_debug(cpu, 0);
 960        } else {
 961            /* must flush all the translated code to avoid inconsistencies */
 962            /* XXX: only flush what is necessary */
 963            tb_flush(cpu);
 964        }
 965    }
 966}
 967
 968void cpu_abort(CPUState *cpu, const char *fmt, ...)
 969{
 970    va_list ap;
 971    va_list ap2;
 972
 973    va_start(ap, fmt);
 974    va_copy(ap2, ap);
 975    fprintf(stderr, "qemu: fatal: ");
 976    vfprintf(stderr, fmt, ap);
 977    fprintf(stderr, "\n");
 978    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 979    if (qemu_log_separate()) {
 980        qemu_log("qemu: fatal: ");
 981        qemu_log_vprintf(fmt, ap2);
 982        qemu_log("\n");
 983        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 984        qemu_log_flush();
 985        qemu_log_close();
 986    }
 987    va_end(ap2);
 988    va_end(ap);
 989    replay_finish();
 990#if defined(CONFIG_USER_ONLY)
 991    {
 992        struct sigaction act;
 993        sigfillset(&act.sa_mask);
 994        act.sa_handler = SIG_DFL;
 995        sigaction(SIGABRT, &act, NULL);
 996    }
 997#endif
 998    abort();
 999}
1000
1001#if !defined(CONFIG_USER_ONLY)
1002/* Called from RCU critical section */
1003static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1004{
1005    RAMBlock *block;
1006
1007    block = atomic_rcu_read(&ram_list.mru_block);
1008    if (block && addr - block->offset < block->max_length) {
1009        return block;
1010    }
1011    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1012        if (addr - block->offset < block->max_length) {
1013            goto found;
1014        }
1015    }
1016
1017    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1018    abort();
1019
1020found:
1021    /* It is safe to write mru_block outside the iothread lock.  This
1022     * is what happens:
1023     *
1024     *     mru_block = xxx
1025     *     rcu_read_unlock()
1026     *                                        xxx removed from list
1027     *                  rcu_read_lock()
1028     *                  read mru_block
1029     *                                        mru_block = NULL;
1030     *                                        call_rcu(reclaim_ramblock, xxx);
1031     *                  rcu_read_unlock()
1032     *
1033     * atomic_rcu_set is not needed here.  The block was already published
1034     * when it was placed into the list.  Here we're just making an extra
1035     * copy of the pointer.
1036     */
1037    ram_list.mru_block = block;
1038    return block;
1039}
1040
1041static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1042{
1043    CPUState *cpu;
1044    ram_addr_t start1;
1045    RAMBlock *block;
1046    ram_addr_t end;
1047
1048    end = TARGET_PAGE_ALIGN(start + length);
1049    start &= TARGET_PAGE_MASK;
1050
1051    rcu_read_lock();
1052    block = qemu_get_ram_block(start);
1053    assert(block == qemu_get_ram_block(end - 1));
1054    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1055    CPU_FOREACH(cpu) {
1056        tlb_reset_dirty(cpu, start1, length);
1057    }
1058    rcu_read_unlock();
1059}
1060
1061/* Note: start and end must be within the same ram block.  */
1062bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1063                                              ram_addr_t length,
1064                                              unsigned client)
1065{
1066    DirtyMemoryBlocks *blocks;
1067    unsigned long end, page;
1068    bool dirty = false;
1069
1070    if (length == 0) {
1071        return false;
1072    }
1073
1074    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1075    page = start >> TARGET_PAGE_BITS;
1076
1077    rcu_read_lock();
1078
1079    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1080
1081    while (page < end) {
1082        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1083        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1084        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1085
1086        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1087                                              offset, num);
1088        page += num;
1089    }
1090
1091    rcu_read_unlock();
1092
1093    if (dirty && tcg_enabled()) {
1094        tlb_reset_dirty_range_all(start, length);
1095    }
1096
1097    return dirty;
1098}
1099
1100/* Called from RCU critical section */
1101hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1102                                       MemoryRegionSection *section,
1103                                       target_ulong vaddr,
1104                                       hwaddr paddr, hwaddr xlat,
1105                                       int prot,
1106                                       target_ulong *address)
1107{
1108    hwaddr iotlb;
1109    CPUWatchpoint *wp;
1110
1111    if (memory_region_is_ram(section->mr)) {
1112        /* Normal RAM.  */
1113        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1114        if (!section->readonly) {
1115            iotlb |= PHYS_SECTION_NOTDIRTY;
1116        } else {
1117            iotlb |= PHYS_SECTION_ROM;
1118        }
1119    } else {
1120        AddressSpaceDispatch *d;
1121
1122        d = atomic_rcu_read(&section->address_space->dispatch);
1123        iotlb = section - d->map.sections;
1124        iotlb += xlat;
1125    }
1126
1127    /* Make accesses to pages with watchpoints go via the
1128       watchpoint trap routines.  */
1129    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1130        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1131            /* Avoid trapping reads of pages with a write breakpoint. */
1132            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1133                iotlb = PHYS_SECTION_WATCH + paddr;
1134                *address |= TLB_MMIO;
1135                break;
1136            }
1137        }
1138    }
1139
1140    return iotlb;
1141}
1142#endif /* defined(CONFIG_USER_ONLY) */
1143
1144#if !defined(CONFIG_USER_ONLY)
1145
1146static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1147                             uint16_t section);
1148static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1149
1150static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1151                               qemu_anon_ram_alloc;
1152
1153/*
1154 * Set a custom physical guest memory alloator.
1155 * Accelerators with unusual needs may need this.  Hopefully, we can
1156 * get rid of it eventually.
1157 */
1158void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1159{
1160    phys_mem_alloc = alloc;
1161}
1162
1163static uint16_t phys_section_add(PhysPageMap *map,
1164                                 MemoryRegionSection *section)
1165{
1166    /* The physical section number is ORed with a page-aligned
1167     * pointer to produce the iotlb entries.  Thus it should
1168     * never overflow into the page-aligned value.
1169     */
1170    assert(map->sections_nb < TARGET_PAGE_SIZE);
1171
1172    if (map->sections_nb == map->sections_nb_alloc) {
1173        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1174        map->sections = g_renew(MemoryRegionSection, map->sections,
1175                                map->sections_nb_alloc);
1176    }
1177    map->sections[map->sections_nb] = *section;
1178    memory_region_ref(section->mr);
1179    return map->sections_nb++;
1180}
1181
1182static void phys_section_destroy(MemoryRegion *mr)
1183{
1184    bool have_sub_page = mr->subpage;
1185
1186    memory_region_unref(mr);
1187
1188    if (have_sub_page) {
1189        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1190        object_unref(OBJECT(&subpage->iomem));
1191        g_free(subpage);
1192    }
1193}
1194
1195static void phys_sections_free(PhysPageMap *map)
1196{
1197    while (map->sections_nb > 0) {
1198        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1199        phys_section_destroy(section->mr);
1200    }
1201    g_free(map->sections);
1202    g_free(map->nodes);
1203}
1204
1205static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1206{
1207    subpage_t *subpage;
1208    hwaddr base = section->offset_within_address_space
1209        & TARGET_PAGE_MASK;
1210    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1211                                                   d->map.nodes, d->map.sections);
1212    MemoryRegionSection subsection = {
1213        .offset_within_address_space = base,
1214        .size = int128_make64(TARGET_PAGE_SIZE),
1215    };
1216    hwaddr start, end;
1217
1218    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1219
1220    if (!(existing->mr->subpage)) {
1221        subpage = subpage_init(d->as, base);
1222        subsection.address_space = d->as;
1223        subsection.mr = &subpage->iomem;
1224        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1225                      phys_section_add(&d->map, &subsection));
1226    } else {
1227        subpage = container_of(existing->mr, subpage_t, iomem);
1228    }
1229    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1230    end = start + int128_get64(section->size) - 1;
1231    subpage_register(subpage, start, end,
1232                     phys_section_add(&d->map, section));
1233}
1234
1235
1236static void register_multipage(AddressSpaceDispatch *d,
1237                               MemoryRegionSection *section)
1238{
1239    hwaddr start_addr = section->offset_within_address_space;
1240    uint16_t section_index = phys_section_add(&d->map, section);
1241    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1242                                                    TARGET_PAGE_BITS));
1243
1244    assert(num_pages);
1245    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1246}
1247
1248static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1249{
1250    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1251    AddressSpaceDispatch *d = as->next_dispatch;
1252    MemoryRegionSection now = *section, remain = *section;
1253    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1254
1255    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1256        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1257                       - now.offset_within_address_space;
1258
1259        now.size = int128_min(int128_make64(left), now.size);
1260        register_subpage(d, &now);
1261    } else {
1262        now.size = int128_zero();
1263    }
1264    while (int128_ne(remain.size, now.size)) {
1265        remain.size = int128_sub(remain.size, now.size);
1266        remain.offset_within_address_space += int128_get64(now.size);
1267        remain.offset_within_region += int128_get64(now.size);
1268        now = remain;
1269        if (int128_lt(remain.size, page_size)) {
1270            register_subpage(d, &now);
1271        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1272            now.size = page_size;
1273            register_subpage(d, &now);
1274        } else {
1275            now.size = int128_and(now.size, int128_neg(page_size));
1276            register_multipage(d, &now);
1277        }
1278    }
1279}
1280
1281void qemu_flush_coalesced_mmio_buffer(void)
1282{
1283    if (kvm_enabled())
1284        kvm_flush_coalesced_mmio_buffer();
1285}
1286
1287void qemu_mutex_lock_ramlist(void)
1288{
1289    qemu_mutex_lock(&ram_list.mutex);
1290}
1291
1292void qemu_mutex_unlock_ramlist(void)
1293{
1294    qemu_mutex_unlock(&ram_list.mutex);
1295}
1296
1297#ifdef __linux__
1298static void *file_ram_alloc(RAMBlock *block,
1299                            ram_addr_t memory,
1300                            const char *path,
1301                            Error **errp)
1302{
1303    bool unlink_on_error = false;
1304    char *filename;
1305    char *sanitized_name;
1306    char *c;
1307    void *area;
1308    int fd = -1;
1309    int64_t page_size;
1310
1311    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1312        error_setg(errp,
1313                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1314        return NULL;
1315    }
1316
1317    for (;;) {
1318        fd = open(path, O_RDWR);
1319        if (fd >= 0) {
1320            /* @path names an existing file, use it */
1321            break;
1322        }
1323        if (errno == ENOENT) {
1324            /* @path names a file that doesn't exist, create it */
1325            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1326            if (fd >= 0) {
1327                unlink_on_error = true;
1328                break;
1329            }
1330        } else if (errno == EISDIR) {
1331            /* @path names a directory, create a file there */
1332            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1333            sanitized_name = g_strdup(memory_region_name(block->mr));
1334            for (c = sanitized_name; *c != '\0'; c++) {
1335                if (*c == '/') {
1336                    *c = '_';
1337                }
1338            }
1339
1340            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1341                                       sanitized_name);
1342            g_free(sanitized_name);
1343
1344            fd = mkstemp(filename);
1345            if (fd >= 0) {
1346                unlink(filename);
1347                g_free(filename);
1348                break;
1349            }
1350            g_free(filename);
1351        }
1352        if (errno != EEXIST && errno != EINTR) {
1353            error_setg_errno(errp, errno,
1354                             "can't open backing store %s for guest RAM",
1355                             path);
1356            goto error;
1357        }
1358        /*
1359         * Try again on EINTR and EEXIST.  The latter happens when
1360         * something else creates the file between our two open().
1361         */
1362    }
1363
1364    page_size = qemu_fd_getpagesize(fd);
1365    block->mr->align = page_size;
1366
1367    if (memory < page_size) {
1368        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1369                   "or larger than page size 0x%" PRIx64,
1370                   memory, page_size);
1371        goto error;
1372    }
1373
1374    memory = ROUND_UP(memory, page_size);
1375
1376    /*
1377     * ftruncate is not supported by hugetlbfs in older
1378     * hosts, so don't bother bailing out on errors.
1379     * If anything goes wrong with it under other filesystems,
1380     * mmap will fail.
1381     */
1382    if (ftruncate(fd, memory)) {
1383        perror("ftruncate");
1384    }
1385
1386    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1387    if (area == MAP_FAILED) {
1388        error_setg_errno(errp, errno,
1389                         "unable to map backing store for guest RAM");
1390        goto error;
1391    }
1392
1393    if (mem_prealloc) {
1394        os_mem_prealloc(fd, area, memory);
1395    }
1396
1397    block->fd = fd;
1398    return area;
1399
1400error:
1401    if (unlink_on_error) {
1402        unlink(path);
1403    }
1404    if (fd != -1) {
1405        close(fd);
1406    }
1407    return NULL;
1408}
1409#endif
1410
1411/* Called with the ramlist lock held.  */
1412static ram_addr_t find_ram_offset(ram_addr_t size)
1413{
1414    RAMBlock *block, *next_block;
1415    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1416
1417    assert(size != 0); /* it would hand out same offset multiple times */
1418
1419    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1420        return 0;
1421    }
1422
1423    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1424        ram_addr_t end, next = RAM_ADDR_MAX;
1425
1426        end = block->offset + block->max_length;
1427
1428        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1429            if (next_block->offset >= end) {
1430                next = MIN(next, next_block->offset);
1431            }
1432        }
1433        if (next - end >= size && next - end < mingap) {
1434            offset = end;
1435            mingap = next - end;
1436        }
1437    }
1438
1439    if (offset == RAM_ADDR_MAX) {
1440        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1441                (uint64_t)size);
1442        abort();
1443    }
1444
1445    return offset;
1446}
1447
1448ram_addr_t last_ram_offset(void)
1449{
1450    RAMBlock *block;
1451    ram_addr_t last = 0;
1452
1453    rcu_read_lock();
1454    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1455        last = MAX(last, block->offset + block->max_length);
1456    }
1457    rcu_read_unlock();
1458    return last;
1459}
1460
1461static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1462{
1463    int ret;
1464
1465    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1466    if (!machine_dump_guest_core(current_machine)) {
1467        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1468        if (ret) {
1469            perror("qemu_madvise");
1470            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1471                            "but dump_guest_core=off specified\n");
1472        }
1473    }
1474}
1475
1476/* Called within an RCU critical section, or while the ramlist lock
1477 * is held.
1478 */
1479static RAMBlock *find_ram_block(ram_addr_t addr)
1480{
1481    RAMBlock *block;
1482
1483    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1484        if (block->offset == addr) {
1485            return block;
1486        }
1487    }
1488
1489    return NULL;
1490}
1491
1492const char *qemu_ram_get_idstr(RAMBlock *rb)
1493{
1494    return rb->idstr;
1495}
1496
1497/* Called with iothread lock held.  */
1498void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1499{
1500    RAMBlock *new_block, *block;
1501
1502    rcu_read_lock();
1503    new_block = find_ram_block(addr);
1504    assert(new_block);
1505    assert(!new_block->idstr[0]);
1506
1507    if (dev) {
1508        char *id = qdev_get_dev_path(dev);
1509        if (id) {
1510            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1511            g_free(id);
1512        }
1513    }
1514    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1515
1516    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1517        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1518            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1519                    new_block->idstr);
1520            abort();
1521        }
1522    }
1523    rcu_read_unlock();
1524}
1525
1526/* Called with iothread lock held.  */
1527void qemu_ram_unset_idstr(ram_addr_t addr)
1528{
1529    RAMBlock *block;
1530
1531    /* FIXME: arch_init.c assumes that this is not called throughout
1532     * migration.  Ignore the problem since hot-unplug during migration
1533     * does not work anyway.
1534     */
1535
1536    rcu_read_lock();
1537    block = find_ram_block(addr);
1538    if (block) {
1539        memset(block->idstr, 0, sizeof(block->idstr));
1540    }
1541    rcu_read_unlock();
1542}
1543
1544static int memory_try_enable_merging(void *addr, size_t len)
1545{
1546    if (!machine_mem_merge(current_machine)) {
1547        /* disabled by the user */
1548        return 0;
1549    }
1550
1551    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1552}
1553
1554/* Only legal before guest might have detected the memory size: e.g. on
1555 * incoming migration, or right after reset.
1556 *
1557 * As memory core doesn't know how is memory accessed, it is up to
1558 * resize callback to update device state and/or add assertions to detect
1559 * misuse, if necessary.
1560 */
1561int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1562{
1563    RAMBlock *block = find_ram_block(base);
1564
1565    assert(block);
1566
1567    newsize = HOST_PAGE_ALIGN(newsize);
1568
1569    if (block->used_length == newsize) {
1570        return 0;
1571    }
1572
1573    if (!(block->flags & RAM_RESIZEABLE)) {
1574        error_setg_errno(errp, EINVAL,
1575                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1576                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1577                         newsize, block->used_length);
1578        return -EINVAL;
1579    }
1580
1581    if (block->max_length < newsize) {
1582        error_setg_errno(errp, EINVAL,
1583                         "Length too large: %s: 0x" RAM_ADDR_FMT
1584                         " > 0x" RAM_ADDR_FMT, block->idstr,
1585                         newsize, block->max_length);
1586        return -EINVAL;
1587    }
1588
1589    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1590    block->used_length = newsize;
1591    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1592                                        DIRTY_CLIENTS_ALL);
1593    memory_region_set_size(block->mr, newsize);
1594    if (block->resized) {
1595        block->resized(block->idstr, newsize, block->host);
1596    }
1597    return 0;
1598}
1599
1600/* Called with ram_list.mutex held */
1601static void dirty_memory_extend(ram_addr_t old_ram_size,
1602                                ram_addr_t new_ram_size)
1603{
1604    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1605                                             DIRTY_MEMORY_BLOCK_SIZE);
1606    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1607                                             DIRTY_MEMORY_BLOCK_SIZE);
1608    int i;
1609
1610    /* Only need to extend if block count increased */
1611    if (new_num_blocks <= old_num_blocks) {
1612        return;
1613    }
1614
1615    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1616        DirtyMemoryBlocks *old_blocks;
1617        DirtyMemoryBlocks *new_blocks;
1618        int j;
1619
1620        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1621        new_blocks = g_malloc(sizeof(*new_blocks) +
1622                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1623
1624        if (old_num_blocks) {
1625            memcpy(new_blocks->blocks, old_blocks->blocks,
1626                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1627        }
1628
1629        for (j = old_num_blocks; j < new_num_blocks; j++) {
1630            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1631        }
1632
1633        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1634
1635        if (old_blocks) {
1636            g_free_rcu(old_blocks, rcu);
1637        }
1638    }
1639}
1640
1641static void ram_block_add(RAMBlock *new_block, Error **errp)
1642{
1643    RAMBlock *block;
1644    RAMBlock *last_block = NULL;
1645    ram_addr_t old_ram_size, new_ram_size;
1646    Error *err = NULL;
1647
1648    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1649
1650    qemu_mutex_lock_ramlist();
1651    new_block->offset = find_ram_offset(new_block->max_length);
1652
1653    if (!new_block->host) {
1654        if (xen_enabled()) {
1655            xen_ram_alloc(new_block->offset, new_block->max_length,
1656                          new_block->mr, &err);
1657            if (err) {
1658                error_propagate(errp, err);
1659                qemu_mutex_unlock_ramlist();
1660                return;
1661            }
1662        } else {
1663            new_block->host = phys_mem_alloc(new_block->max_length,
1664                                             &new_block->mr->align);
1665            if (!new_block->host) {
1666                error_setg_errno(errp, errno,
1667                                 "cannot set up guest memory '%s'",
1668                                 memory_region_name(new_block->mr));
1669                qemu_mutex_unlock_ramlist();
1670                return;
1671            }
1672            memory_try_enable_merging(new_block->host, new_block->max_length);
1673        }
1674    }
1675
1676    new_ram_size = MAX(old_ram_size,
1677              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1678    if (new_ram_size > old_ram_size) {
1679        migration_bitmap_extend(old_ram_size, new_ram_size);
1680        dirty_memory_extend(old_ram_size, new_ram_size);
1681    }
1682    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1683     * QLIST (which has an RCU-friendly variant) does not have insertion at
1684     * tail, so save the last element in last_block.
1685     */
1686    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1687        last_block = block;
1688        if (block->max_length < new_block->max_length) {
1689            break;
1690        }
1691    }
1692    if (block) {
1693        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1694    } else if (last_block) {
1695        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1696    } else { /* list is empty */
1697        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1698    }
1699    ram_list.mru_block = NULL;
1700
1701    /* Write list before version */
1702    smp_wmb();
1703    ram_list.version++;
1704    qemu_mutex_unlock_ramlist();
1705
1706    cpu_physical_memory_set_dirty_range(new_block->offset,
1707                                        new_block->used_length,
1708                                        DIRTY_CLIENTS_ALL);
1709
1710    if (new_block->host) {
1711        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1712        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1713        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1714        if (kvm_enabled()) {
1715            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1716        }
1717    }
1718}
1719
1720#ifdef __linux__
1721RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1722                                   bool share, const char *mem_path,
1723                                   Error **errp)
1724{
1725    RAMBlock *new_block;
1726    Error *local_err = NULL;
1727
1728    if (xen_enabled()) {
1729        error_setg(errp, "-mem-path not supported with Xen");
1730        return NULL;
1731    }
1732
1733    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1734        /*
1735         * file_ram_alloc() needs to allocate just like
1736         * phys_mem_alloc, but we haven't bothered to provide
1737         * a hook there.
1738         */
1739        error_setg(errp,
1740                   "-mem-path not supported with this accelerator");
1741        return NULL;
1742    }
1743
1744    size = HOST_PAGE_ALIGN(size);
1745    new_block = g_malloc0(sizeof(*new_block));
1746    new_block->mr = mr;
1747    new_block->used_length = size;
1748    new_block->max_length = size;
1749    new_block->flags = share ? RAM_SHARED : 0;
1750    new_block->flags |= RAM_FILE;
1751    new_block->host = file_ram_alloc(new_block, size,
1752                                     mem_path, errp);
1753    if (!new_block->host) {
1754        g_free(new_block);
1755        return NULL;
1756    }
1757
1758    ram_block_add(new_block, &local_err);
1759    if (local_err) {
1760        g_free(new_block);
1761        error_propagate(errp, local_err);
1762        return NULL;
1763    }
1764    return new_block;
1765}
1766#elif defined(__WIN32)
1767RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1768                                   bool share, const char *mem_path,
1769                                   Error **errp)
1770{
1771    return qemu_ram_alloc(size, mr, errp);
1772}
1773#endif
1774
1775static
1776RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1777                                  void (*resized)(const char*,
1778                                                  uint64_t length,
1779                                                  void *host),
1780                                  void *host, bool resizeable,
1781                                  MemoryRegion *mr, Error **errp)
1782{
1783    RAMBlock *new_block;
1784    Error *local_err = NULL;
1785
1786    size = HOST_PAGE_ALIGN(size);
1787    max_size = HOST_PAGE_ALIGN(max_size);
1788    new_block = g_malloc0(sizeof(*new_block));
1789    new_block->mr = mr;
1790    new_block->resized = resized;
1791    new_block->used_length = size;
1792    new_block->max_length = max_size;
1793    assert(max_size >= size);
1794    new_block->fd = -1;
1795    new_block->host = host;
1796    if (host) {
1797        new_block->flags |= RAM_PREALLOC;
1798    }
1799    if (resizeable) {
1800        new_block->flags |= RAM_RESIZEABLE;
1801    }
1802    ram_block_add(new_block, &local_err);
1803    if (local_err) {
1804        g_free(new_block);
1805        error_propagate(errp, local_err);
1806        return NULL;
1807    }
1808    return new_block;
1809}
1810
1811RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1812                                   MemoryRegion *mr, Error **errp)
1813{
1814    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1815}
1816
1817RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1818{
1819    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1820}
1821
1822RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1823                                     void (*resized)(const char*,
1824                                                     uint64_t length,
1825                                                     void *host),
1826                                     MemoryRegion *mr, Error **errp)
1827{
1828    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1829}
1830
1831static void reclaim_ramblock(RAMBlock *block)
1832{
1833    if (block->flags & RAM_PREALLOC) {
1834        ;
1835    } else if (xen_enabled()) {
1836        xen_invalidate_map_cache_entry(block->host);
1837#ifndef _WIN32
1838    } else if (block->fd >= 0) {
1839        qemu_ram_munmap(block->host, block->max_length);
1840        close(block->fd);
1841#endif
1842    } else {
1843        qemu_anon_ram_free(block->host, block->max_length);
1844    }
1845    g_free(block);
1846}
1847
1848void qemu_ram_free(RAMBlock *block)
1849{
1850    if (!block) {
1851        return;
1852    }
1853
1854    qemu_mutex_lock_ramlist();
1855    QLIST_REMOVE_RCU(block, next);
1856    ram_list.mru_block = NULL;
1857    /* Write list before version */
1858    smp_wmb();
1859    ram_list.version++;
1860    call_rcu(block, reclaim_ramblock, rcu);
1861    qemu_mutex_unlock_ramlist();
1862}
1863
1864#ifndef _WIN32
1865void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1866{
1867    RAMBlock *block;
1868    ram_addr_t offset;
1869    int flags;
1870    void *area, *vaddr;
1871
1872    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1873        offset = addr - block->offset;
1874        if (offset < block->max_length) {
1875            vaddr = ramblock_ptr(block, offset);
1876            if (block->flags & RAM_PREALLOC) {
1877                ;
1878            } else if (xen_enabled()) {
1879                abort();
1880            } else {
1881                flags = MAP_FIXED;
1882                if (block->fd >= 0) {
1883                    flags |= (block->flags & RAM_SHARED ?
1884                              MAP_SHARED : MAP_PRIVATE);
1885                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1886                                flags, block->fd, offset);
1887                } else {
1888                    /*
1889                     * Remap needs to match alloc.  Accelerators that
1890                     * set phys_mem_alloc never remap.  If they did,
1891                     * we'd need a remap hook here.
1892                     */
1893                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1894
1895                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1896                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1897                                flags, -1, 0);
1898                }
1899                if (area != vaddr) {
1900                    fprintf(stderr, "Could not remap addr: "
1901                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1902                            length, addr);
1903                    exit(1);
1904                }
1905                memory_try_enable_merging(vaddr, length);
1906                qemu_ram_setup_dump(vaddr, length);
1907            }
1908        }
1909    }
1910}
1911#endif /* !_WIN32 */
1912
1913int qemu_get_ram_fd(ram_addr_t addr)
1914{
1915    RAMBlock *block;
1916    int fd;
1917
1918    rcu_read_lock();
1919    block = qemu_get_ram_block(addr);
1920    fd = block->fd;
1921    rcu_read_unlock();
1922    return fd;
1923}
1924
1925void qemu_set_ram_fd(ram_addr_t addr, int fd)
1926{
1927    RAMBlock *block;
1928
1929    rcu_read_lock();
1930    block = qemu_get_ram_block(addr);
1931    block->fd = fd;
1932    rcu_read_unlock();
1933}
1934
1935void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1936{
1937    RAMBlock *block;
1938    void *ptr;
1939
1940    rcu_read_lock();
1941    block = qemu_get_ram_block(addr);
1942    ptr = ramblock_ptr(block, 0);
1943    rcu_read_unlock();
1944    return ptr;
1945}
1946
1947/* Return a host pointer to ram allocated with qemu_ram_alloc.
1948 * This should not be used for general purpose DMA.  Use address_space_map
1949 * or address_space_rw instead. For local memory (e.g. video ram) that the
1950 * device owns, use memory_region_get_ram_ptr.
1951 *
1952 * Called within RCU critical section.
1953 */
1954void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1955{
1956    RAMBlock *block = ram_block;
1957
1958    if (block == NULL) {
1959        block = qemu_get_ram_block(addr);
1960    }
1961
1962    if (xen_enabled() && block->host == NULL) {
1963        /* We need to check if the requested address is in the RAM
1964         * because we don't want to map the entire memory in QEMU.
1965         * In that case just map until the end of the page.
1966         */
1967        if (block->offset == 0) {
1968            return xen_map_cache(addr, 0, 0);
1969        }
1970
1971        block->host = xen_map_cache(block->offset, block->max_length, 1);
1972    }
1973    return ramblock_ptr(block, addr - block->offset);
1974}
1975
1976/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1977 * but takes a size argument.
1978 *
1979 * Called within RCU critical section.
1980 */
1981static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1982                                 hwaddr *size)
1983{
1984    RAMBlock *block = ram_block;
1985    ram_addr_t offset_inside_block;
1986    if (*size == 0) {
1987        return NULL;
1988    }
1989
1990    if (block == NULL) {
1991        block = qemu_get_ram_block(addr);
1992    }
1993    offset_inside_block = addr - block->offset;
1994    *size = MIN(*size, block->max_length - offset_inside_block);
1995
1996    if (xen_enabled() && block->host == NULL) {
1997        /* We need to check if the requested address is in the RAM
1998         * because we don't want to map the entire memory in QEMU.
1999         * In that case just map the requested area.
2000         */
2001        if (block->offset == 0) {
2002            return xen_map_cache(addr, *size, 1);
2003        }
2004
2005        block->host = xen_map_cache(block->offset, block->max_length, 1);
2006    }
2007
2008    return ramblock_ptr(block, offset_inside_block);
2009}
2010
2011/*
2012 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
2013 * in that RAMBlock.
2014 *
2015 * ptr: Host pointer to look up
2016 * round_offset: If true round the result offset down to a page boundary
2017 * *ram_addr: set to result ram_addr
2018 * *offset: set to result offset within the RAMBlock
2019 *
2020 * Returns: RAMBlock (or NULL if not found)
2021 *
2022 * By the time this function returns, the returned pointer is not protected
2023 * by RCU anymore.  If the caller is not within an RCU critical section and
2024 * does not hold the iothread lock, it must have other means of protecting the
2025 * pointer, such as a reference to the region that includes the incoming
2026 * ram_addr_t.
2027 */
2028RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
2029                                   ram_addr_t *ram_addr,
2030                                   ram_addr_t *offset)
2031{
2032    RAMBlock *block;
2033    uint8_t *host = ptr;
2034
2035    if (xen_enabled()) {
2036        rcu_read_lock();
2037        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2038        block = qemu_get_ram_block(*ram_addr);
2039        if (block) {
2040            *offset = (host - block->host);
2041        }
2042        rcu_read_unlock();
2043        return block;
2044    }
2045
2046    rcu_read_lock();
2047    block = atomic_rcu_read(&ram_list.mru_block);
2048    if (block && block->host && host - block->host < block->max_length) {
2049        goto found;
2050    }
2051
2052    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2053        /* This case append when the block is not mapped. */
2054        if (block->host == NULL) {
2055            continue;
2056        }
2057        if (host - block->host < block->max_length) {
2058            goto found;
2059        }
2060    }
2061
2062    rcu_read_unlock();
2063    return NULL;
2064
2065found:
2066    *offset = (host - block->host);
2067    if (round_offset) {
2068        *offset &= TARGET_PAGE_MASK;
2069    }
2070    *ram_addr = block->offset + *offset;
2071    rcu_read_unlock();
2072    return block;
2073}
2074
2075/*
2076 * Finds the named RAMBlock
2077 *
2078 * name: The name of RAMBlock to find
2079 *
2080 * Returns: RAMBlock (or NULL if not found)
2081 */
2082RAMBlock *qemu_ram_block_by_name(const char *name)
2083{
2084    RAMBlock *block;
2085
2086    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2087        if (!strcmp(name, block->idstr)) {
2088            return block;
2089        }
2090    }
2091
2092    return NULL;
2093}
2094
2095/* Some of the softmmu routines need to translate from a host pointer
2096   (typically a TLB entry) back to a ram offset.  */
2097MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2098{
2099    RAMBlock *block;
2100    ram_addr_t offset; /* Not used */
2101
2102    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2103
2104    if (!block) {
2105        return NULL;
2106    }
2107
2108    return block->mr;
2109}
2110
2111/* Called within RCU critical section.  */
2112static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2113                               uint64_t val, unsigned size)
2114{
2115    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2116        tb_invalidate_phys_page_fast(ram_addr, size);
2117    }
2118    switch (size) {
2119    case 1:
2120        stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2121        break;
2122    case 2:
2123        stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2124        break;
2125    case 4:
2126        stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2127        break;
2128    default:
2129        abort();
2130    }
2131    /* Set both VGA and migration bits for simplicity and to remove
2132     * the notdirty callback faster.
2133     */
2134    cpu_physical_memory_set_dirty_range(ram_addr, size,
2135                                        DIRTY_CLIENTS_NOCODE);
2136    /* we remove the notdirty callback only if the code has been
2137       flushed */
2138    if (!cpu_physical_memory_is_clean(ram_addr)) {
2139        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2140    }
2141}
2142
2143static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2144                                 unsigned size, bool is_write)
2145{
2146    return is_write;
2147}
2148
2149static const MemoryRegionOps notdirty_mem_ops = {
2150    .write = notdirty_mem_write,
2151    .valid.accepts = notdirty_mem_accepts,
2152    .endianness = DEVICE_NATIVE_ENDIAN,
2153};
2154
2155/* Generate a debug exception if a watchpoint has been hit.  */
2156static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2157{
2158    CPUState *cpu = current_cpu;
2159    CPUClass *cc = CPU_GET_CLASS(cpu);
2160    CPUArchState *env = cpu->env_ptr;
2161    target_ulong pc, cs_base;
2162    target_ulong vaddr;
2163    CPUWatchpoint *wp;
2164    int cpu_flags;
2165
2166    if (cpu->watchpoint_hit) {
2167        /* We re-entered the check after replacing the TB. Now raise
2168         * the debug interrupt so that is will trigger after the
2169         * current instruction. */
2170        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2171        return;
2172    }
2173    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2174    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2175        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2176            && (wp->flags & flags)) {
2177            if (flags == BP_MEM_READ) {
2178                wp->flags |= BP_WATCHPOINT_HIT_READ;
2179            } else {
2180                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2181            }
2182            wp->hitaddr = vaddr;
2183            wp->hitattrs = attrs;
2184            if (!cpu->watchpoint_hit) {
2185                if (wp->flags & BP_CPU &&
2186                    !cc->debug_check_watchpoint(cpu, wp)) {
2187                    wp->flags &= ~BP_WATCHPOINT_HIT;
2188                    continue;
2189                }
2190                cpu->watchpoint_hit = wp;
2191                tb_check_watchpoint(cpu);
2192                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2193                    cpu->exception_index = EXCP_DEBUG;
2194                    cpu_loop_exit(cpu);
2195                } else {
2196                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2197                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2198                    cpu_resume_from_signal(cpu, NULL);
2199                }
2200            }
2201        } else {
2202            wp->flags &= ~BP_WATCHPOINT_HIT;
2203        }
2204    }
2205}
2206
2207/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2208   so these check for a hit then pass through to the normal out-of-line
2209   phys routines.  */
2210static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2211                                  unsigned size, MemTxAttrs attrs)
2212{
2213    MemTxResult res;
2214    uint64_t data;
2215    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2216    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2217
2218    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2219    switch (size) {
2220    case 1:
2221        data = address_space_ldub(as, addr, attrs, &res);
2222        break;
2223    case 2:
2224        data = address_space_lduw(as, addr, attrs, &res);
2225        break;
2226    case 4:
2227        data = address_space_ldl(as, addr, attrs, &res);
2228        break;
2229    default: abort();
2230    }
2231    *pdata = data;
2232    return res;
2233}
2234
2235static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2236                                   uint64_t val, unsigned size,
2237                                   MemTxAttrs attrs)
2238{
2239    MemTxResult res;
2240    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2241    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2242
2243    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2244    switch (size) {
2245    case 1:
2246        address_space_stb(as, addr, val, attrs, &res);
2247        break;
2248    case 2:
2249        address_space_stw(as, addr, val, attrs, &res);
2250        break;
2251    case 4:
2252        address_space_stl(as, addr, val, attrs, &res);
2253        break;
2254    default: abort();
2255    }
2256    return res;
2257}
2258
2259static const MemoryRegionOps watch_mem_ops = {
2260    .read_with_attrs = watch_mem_read,
2261    .write_with_attrs = watch_mem_write,
2262    .endianness = DEVICE_NATIVE_ENDIAN,
2263};
2264
2265static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2266                                unsigned len, MemTxAttrs attrs)
2267{
2268    subpage_t *subpage = opaque;
2269    uint8_t buf[8];
2270    MemTxResult res;
2271
2272#if defined(DEBUG_SUBPAGE)
2273    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2274           subpage, len, addr);
2275#endif
2276    res = address_space_read(subpage->as, addr + subpage->base,
2277                             attrs, buf, len);
2278    if (res) {
2279        return res;
2280    }
2281    switch (len) {
2282    case 1:
2283        *data = ldub_p(buf);
2284        return MEMTX_OK;
2285    case 2:
2286        *data = lduw_p(buf);
2287        return MEMTX_OK;
2288    case 4:
2289        *data = ldl_p(buf);
2290        return MEMTX_OK;
2291    case 8:
2292        *data = ldq_p(buf);
2293        return MEMTX_OK;
2294    default:
2295        abort();
2296    }
2297}
2298
2299static MemTxResult subpage_write(void *opaque, hwaddr addr,
2300                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2301{
2302    subpage_t *subpage = opaque;
2303    uint8_t buf[8];
2304
2305#if defined(DEBUG_SUBPAGE)
2306    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2307           " value %"PRIx64"\n",
2308           __func__, subpage, len, addr, value);
2309#endif
2310    switch (len) {
2311    case 1:
2312        stb_p(buf, value);
2313        break;
2314    case 2:
2315        stw_p(buf, value);
2316        break;
2317    case 4:
2318        stl_p(buf, value);
2319        break;
2320    case 8:
2321        stq_p(buf, value);
2322        break;
2323    default:
2324        abort();
2325    }
2326    return address_space_write(subpage->as, addr + subpage->base,
2327                               attrs, buf, len);
2328}
2329
2330static bool subpage_accepts(void *opaque, hwaddr addr,
2331                            unsigned len, bool is_write)
2332{
2333    subpage_t *subpage = opaque;
2334#if defined(DEBUG_SUBPAGE)
2335    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2336           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2337#endif
2338
2339    return address_space_access_valid(subpage->as, addr + subpage->base,
2340                                      len, is_write, MEMTXATTRS_UNSPECIFIED);
2341}
2342
2343static bool subpage_accepts_tr(MemoryTransaction *tr)
2344{
2345    MemTxAttrs attr = tr->attr;
2346    subpage_t *subpage = tr->opaque;
2347    hwaddr addr = tr->addr;
2348    unsigned len = tr->size;
2349    bool is_write = tr->rw;
2350
2351#if defined(DEBUG_SUBPAGE)
2352    fprintf(stderr, "%s: subpage %p %c len %u addr " TARGET_FMT_plx
2353            " secure: %d\n",
2354           __func__, subpage, is_write ? 'w' : 'r', len, addr, attr.secure);
2355#endif
2356
2357    return address_space_access_valid(subpage->as, addr + subpage->base,
2358                                      len, is_write, attr);
2359}
2360
2361static const MemoryRegionOps subpage_ops = {
2362    .read_with_attrs = subpage_read,
2363    .write_with_attrs = subpage_write,
2364    .impl.min_access_size = 1,
2365    .impl.max_access_size = 8,
2366    .valid.min_access_size = 1,
2367    .valid.max_access_size = 8,
2368    .valid.accepts = subpage_accepts,
2369    .valid.accepts_tr = subpage_accepts_tr,
2370    .endianness = DEVICE_NATIVE_ENDIAN,
2371};
2372
2373static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2374                             uint16_t section)
2375{
2376    int idx, eidx;
2377
2378    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2379        return -1;
2380    idx = SUBPAGE_IDX(start);
2381    eidx = SUBPAGE_IDX(end);
2382#if defined(DEBUG_SUBPAGE)
2383    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2384           __func__, mmio, start, end, idx, eidx, section);
2385#endif
2386    for (; idx <= eidx; idx++) {
2387        mmio->sub_section[idx] = section;
2388    }
2389
2390    return 0;
2391}
2392
2393static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2394{
2395    subpage_t *mmio;
2396
2397    mmio = g_malloc0(sizeof(subpage_t));
2398
2399    mmio->as = as;
2400    mmio->base = base;
2401    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2402                          NULL, TARGET_PAGE_SIZE);
2403    mmio->iomem.subpage = true;
2404#if defined(DEBUG_SUBPAGE)
2405    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2406           mmio, base, TARGET_PAGE_SIZE);
2407#endif
2408    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2409
2410    return mmio;
2411}
2412
2413static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2414                              MemoryRegion *mr)
2415{
2416    assert(as);
2417    MemoryRegionSection section = {
2418        .address_space = as,
2419        .mr = mr,
2420        .offset_within_address_space = 0,
2421        .offset_within_region = 0,
2422        .size = int128_2_64(),
2423    };
2424
2425    return phys_section_add(map, &section);
2426}
2427
2428MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2429{
2430    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2431    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2432    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2433    MemoryRegionSection *sections = d->map.sections;
2434
2435    return sections[index & ~TARGET_PAGE_MASK].mr;
2436}
2437
2438static void io_mem_init(void)
2439{
2440    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2441    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2442                          NULL, UINT64_MAX);
2443    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2444                          NULL, UINT64_MAX);
2445    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2446                          NULL, UINT64_MAX);
2447}
2448
2449static void mem_begin(MemoryListener *listener)
2450{
2451    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2452    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2453    uint16_t n;
2454
2455    n = dummy_section(&d->map, as, &io_mem_unassigned);
2456    assert(n == PHYS_SECTION_UNASSIGNED);
2457    n = dummy_section(&d->map, as, &io_mem_notdirty);
2458    assert(n == PHYS_SECTION_NOTDIRTY);
2459    n = dummy_section(&d->map, as, &io_mem_rom);
2460    assert(n == PHYS_SECTION_ROM);
2461    n = dummy_section(&d->map, as, &io_mem_watch);
2462    assert(n == PHYS_SECTION_WATCH);
2463
2464    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2465    d->as = as;
2466    as->next_dispatch = d;
2467}
2468
2469static void address_space_dispatch_free(AddressSpaceDispatch *d)
2470{
2471    phys_sections_free(&d->map);
2472    g_free(d);
2473}
2474
2475static void mem_commit(MemoryListener *listener)
2476{
2477    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2478    AddressSpaceDispatch *cur = as->dispatch;
2479    AddressSpaceDispatch *next = as->next_dispatch;
2480
2481    phys_page_compact_all(next, next->map.nodes_nb);
2482
2483    atomic_rcu_set(&as->dispatch, next);
2484    if (cur) {
2485        call_rcu(cur, address_space_dispatch_free, rcu);
2486    }
2487}
2488
2489static void tcg_commit(MemoryListener *listener)
2490{
2491    CPUAddressSpace *cpuas;
2492    AddressSpaceDispatch *d;
2493
2494    /* since each CPU stores ram addresses in its TLB cache, we must
2495       reset the modified entries */
2496    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2497    cpu_reloading_memory_map();
2498    /* The CPU and TLB are protected by the iothread lock.
2499     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2500     * may have split the RCU critical section.
2501     */
2502    d = atomic_rcu_read(&cpuas->as->dispatch);
2503    cpuas->memory_dispatch = d;
2504    tlb_flush(cpuas->cpu, 1);
2505}
2506
2507void address_space_init_dispatch(AddressSpace *as)
2508{
2509    as->dispatch = NULL;
2510    as->dispatch_listener = (MemoryListener) {
2511        .begin = mem_begin,
2512        .commit = mem_commit,
2513        .region_add = mem_add,
2514        .region_nop = mem_add,
2515        .priority = 0,
2516    };
2517    memory_listener_register(&as->dispatch_listener, as);
2518}
2519
2520void address_space_unregister(AddressSpace *as)
2521{
2522    memory_listener_unregister(&as->dispatch_listener);
2523}
2524
2525void address_space_destroy_dispatch(AddressSpace *as)
2526{
2527    AddressSpaceDispatch *d = as->dispatch;
2528
2529    atomic_rcu_set(&as->dispatch, NULL);
2530    if (d) {
2531        call_rcu(d, address_space_dispatch_free, rcu);
2532    }
2533}
2534
2535static void memory_map_init(void)
2536{
2537    system_memory = g_malloc(sizeof(*system_memory));
2538
2539    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2540    address_space_init(&address_space_memory, system_memory, "memory");
2541
2542    system_io = g_malloc(sizeof(*system_io));
2543    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2544                          65536);
2545    address_space_init(&address_space_io, system_io, "I/O");
2546}
2547
2548MemoryRegion *get_system_memory(void)
2549{
2550    return system_memory;
2551}
2552
2553MemoryRegion *get_system_io(void)
2554{
2555    return system_io;
2556}
2557
2558#endif /* !defined(CONFIG_USER_ONLY) */
2559
2560/* physical memory access (slow version, mainly for debug) */
2561#if defined(CONFIG_USER_ONLY)
2562int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2563                        uint8_t *buf, int len, int is_write)
2564{
2565    int l, flags;
2566    target_ulong page;
2567    void * p;
2568
2569    while (len > 0) {
2570        page = addr & TARGET_PAGE_MASK;
2571        l = (page + TARGET_PAGE_SIZE) - addr;
2572        if (l > len)
2573            l = len;
2574        flags = page_get_flags(page);
2575        if (!(flags & PAGE_VALID))
2576            return -1;
2577        if (is_write) {
2578            if (!(flags & PAGE_WRITE))
2579                return -1;
2580            /* XXX: this code should not depend on lock_user */
2581            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2582                return -1;
2583            memcpy(p, buf, l);
2584            unlock_user(p, addr, l);
2585        } else {
2586            if (!(flags & PAGE_READ))
2587                return -1;
2588            /* XXX: this code should not depend on lock_user */
2589            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2590                return -1;
2591            memcpy(buf, p, l);
2592            unlock_user(p, addr, 0);
2593        }
2594        len -= l;
2595        buf += l;
2596        addr += l;
2597    }
2598    return 0;
2599}
2600
2601void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2602                const char *name, Error **errp)
2603{
2604}
2605
2606#else
2607
2608void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2609                const char *name, Error **errp)
2610{
2611    CPUState *cpu = CPU(obj);
2612    Error *local_err = NULL;
2613    char *path = NULL;
2614
2615    visit_type_str(v, name, &path, &local_err);
2616
2617    if (!local_err && strcmp(path, "") != 0) {
2618        cpu->memory = MEMORY_REGION(object_resolve_link(obj, name, path,
2619                                &local_err));
2620    }
2621
2622    if (local_err) {
2623        error_propagate(errp, local_err);
2624        return;
2625    }
2626
2627    object_ref(OBJECT(cpu->memory));
2628    cpu->as = address_space_init_shareable(cpu->memory, NULL);
2629}
2630
2631static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2632                                     hwaddr length)
2633{
2634    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2635    /* No early return if dirty_log_mask is or becomes 0, because
2636     * cpu_physical_memory_set_dirty_range will still call
2637     * xen_modified_memory.
2638     */
2639    if (dirty_log_mask) {
2640        dirty_log_mask =
2641            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2642    }
2643    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2644        tb_invalidate_phys_range(addr, addr + length);
2645        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2646    }
2647    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2648}
2649
2650static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2651{
2652    unsigned access_size_max = mr->ops->valid.max_access_size;
2653
2654    /* Regions are assumed to support 1-4 byte accesses unless
2655       otherwise specified.  */
2656    if (access_size_max == 0) {
2657        access_size_max = 4;
2658    }
2659
2660    /* Bound the maximum access by the alignment of the address.  */
2661    if (!mr->ops->impl.unaligned) {
2662        unsigned align_size_max = addr & -addr;
2663        if (align_size_max != 0 && align_size_max < access_size_max) {
2664            access_size_max = align_size_max;
2665        }
2666    }
2667
2668    /* Don't attempt accesses larger than the maximum.  */
2669    if (l > access_size_max) {
2670        l = access_size_max;
2671    }
2672    l = pow2floor(l);
2673
2674    return l;
2675}
2676
2677static bool prepare_mmio_access(MemoryRegion *mr)
2678{
2679    bool unlocked = !qemu_mutex_iothread_locked();
2680    bool release_lock = false;
2681
2682    if (unlocked && mr->global_locking) {
2683        qemu_mutex_lock_iothread();
2684        unlocked = false;
2685        release_lock = true;
2686    }
2687    if (mr->flush_coalesced_mmio) {
2688        if (unlocked) {
2689            qemu_mutex_lock_iothread();
2690        }
2691        qemu_flush_coalesced_mmio_buffer();
2692        if (unlocked) {
2693            qemu_mutex_unlock_iothread();
2694        }
2695    }
2696
2697    return release_lock;
2698}
2699
2700/* Called within RCU critical section.  */
2701static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2702                                                MemTxAttrs attrs,
2703                                                const uint8_t *buf,
2704                                                int len, hwaddr addr1,
2705                                                hwaddr l, MemoryRegion *mr)
2706{
2707    uint8_t *ptr;
2708    uint64_t val;
2709    MemTxResult result = MEMTX_OK;
2710    bool release_lock = false;
2711
2712    for (;;) {
2713        if (!memory_access_is_direct(mr, true)) {
2714            release_lock |= prepare_mmio_access(mr);
2715            l = memory_access_size(mr, l, addr1);
2716            /* XXX: could force current_cpu to NULL to avoid
2717               potential bugs */
2718            switch (l) {
2719            case 8:
2720                /* 64 bit write access */
2721                val = ldq_p(buf);
2722                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2723                                                       attrs);
2724                break;
2725            case 4:
2726                /* 32 bit write access */
2727                val = ldl_p(buf);
2728                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2729                                                       attrs);
2730                break;
2731            case 2:
2732                /* 16 bit write access */
2733                val = lduw_p(buf);
2734                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2735                                                       attrs);
2736                break;
2737            case 1:
2738                /* 8 bit write access */
2739                val = ldub_p(buf);
2740                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2741                                                       attrs);
2742                break;
2743            default:
2744                abort();
2745            }
2746        } else {
2747            addr1 += memory_region_get_ram_addr(mr);
2748            /* RAM case */
2749            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2750            memcpy(ptr, buf, l);
2751            invalidate_and_set_dirty(mr, addr1, l);
2752        }
2753
2754        if (release_lock) {
2755            qemu_mutex_unlock_iothread();
2756            release_lock = false;
2757        }
2758
2759        len -= l;
2760        buf += l;
2761        addr += l;
2762
2763        if (!len) {
2764            break;
2765        }
2766
2767        l = len;
2768        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2769    }
2770
2771    return result;
2772}
2773
2774MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2775                                const uint8_t *buf, int len)
2776{
2777    hwaddr l;
2778    hwaddr addr1;
2779    MemoryRegion *mr;
2780    MemTxResult result = MEMTX_OK;
2781
2782    if (len > 0) {
2783        rcu_read_lock();
2784        l = len;
2785        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2786        result = address_space_write_continue(as, addr, attrs, buf, len,
2787                                              addr1, l, mr);
2788        rcu_read_unlock();
2789    }
2790
2791    return result;
2792}
2793
2794/* Called within RCU critical section.  */
2795MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2796                                        MemTxAttrs attrs, uint8_t *buf,
2797                                        int len, hwaddr addr1, hwaddr l,
2798                                        MemoryRegion *mr)
2799{
2800    uint8_t *ptr;
2801    uint64_t val;
2802    MemTxResult result = MEMTX_OK;
2803    bool release_lock = false;
2804
2805    for (;;) {
2806        if (!memory_access_is_direct(mr, false)) {
2807            /* I/O case */
2808            release_lock |= prepare_mmio_access(mr);
2809            l = memory_access_size(mr, l, addr1);
2810            switch (l) {
2811            case 8:
2812                /* 64 bit read access */
2813                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2814                                                      attrs);
2815                stq_p(buf, val);
2816                break;
2817            case 4:
2818                /* 32 bit read access */
2819                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2820                                                      attrs);
2821                stl_p(buf, val);
2822                break;
2823            case 2:
2824                /* 16 bit read access */
2825                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2826                                                      attrs);
2827                stw_p(buf, val);
2828                break;
2829            case 1:
2830                /* 8 bit read access */
2831                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2832                                                      attrs);
2833                stb_p(buf, val);
2834                break;
2835            default:
2836                abort();
2837            }
2838        } else {
2839            /* RAM case */
2840            ptr = qemu_get_ram_ptr(mr->ram_block,
2841                                   memory_region_get_ram_addr(mr) + addr1);
2842            memcpy(buf, ptr, l);
2843        }
2844
2845        if (release_lock) {
2846            qemu_mutex_unlock_iothread();
2847            release_lock = false;
2848        }
2849
2850        len -= l;
2851        buf += l;
2852        addr += l;
2853
2854        if (!len) {
2855            break;
2856        }
2857
2858        l = len;
2859        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2860    }
2861
2862    return result;
2863}
2864
2865MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2866                                    MemTxAttrs attrs, uint8_t *buf, int len)
2867{
2868    hwaddr l;
2869    hwaddr addr1;
2870    MemoryRegion *mr;
2871    MemTxResult result = MEMTX_OK;
2872
2873    if (len > 0) {
2874        rcu_read_lock();
2875        l = len;
2876        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2877        result = address_space_read_continue(as, addr, attrs, buf, len,
2878                                             addr1, l, mr);
2879        rcu_read_unlock();
2880    }
2881
2882    return result;
2883}
2884
2885MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2886                             uint8_t *buf, int len, bool is_write)
2887{
2888    if (is_write) {
2889        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2890    } else {
2891        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2892    }
2893}
2894
2895void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2896                            int len, int is_write)
2897{
2898    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2899                     buf, len, is_write);
2900}
2901
2902enum write_rom_type {
2903    WRITE_DATA,
2904    FLUSH_CACHE,
2905};
2906
2907static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2908    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2909{
2910    hwaddr l;
2911    uint8_t *ptr;
2912    hwaddr addr1;
2913    MemoryRegion *mr;
2914
2915    rcu_read_lock();
2916    while (len > 0) {
2917        l = len;
2918        mr = address_space_translate(as, addr, &addr1, &l, true);
2919
2920        if (!(memory_region_is_ram(mr) ||
2921              memory_region_is_romd(mr))) {
2922            if (type == WRITE_DATA) {
2923                address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
2924                                 (uint8_t *) buf, len, true);
2925            } else {
2926                l = memory_access_size(mr, l, addr1);
2927            }
2928        } else {
2929            addr1 += memory_region_get_ram_addr(mr);
2930            /* ROM/RAM case */
2931            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2932            switch (type) {
2933            case WRITE_DATA:
2934                memcpy(ptr, buf, l);
2935                invalidate_and_set_dirty(mr, addr1, l);
2936                break;
2937            case FLUSH_CACHE:
2938                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2939                break;
2940            }
2941        }
2942        len -= l;
2943        buf += l;
2944        addr += l;
2945    }
2946    rcu_read_unlock();
2947}
2948
2949/* used for ROM loading : can write in RAM and ROM */
2950void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2951                                   const uint8_t *buf, int len)
2952{
2953    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2954}
2955
2956void cpu_flush_icache_range(hwaddr start, int len)
2957{
2958    /*
2959     * This function should do the same thing as an icache flush that was
2960     * triggered from within the guest. For TCG we are always cache coherent,
2961     * so there is no need to flush anything. For KVM / Xen we need to flush
2962     * the host's instruction cache at least.
2963     */
2964    if (tcg_enabled()) {
2965        return;
2966    }
2967
2968    cpu_physical_memory_write_rom_internal(&address_space_memory,
2969                                           start, NULL, len, FLUSH_CACHE);
2970}
2971
2972typedef struct {
2973    MemoryRegion *mr;
2974    void *buffer;
2975    hwaddr addr;
2976    hwaddr len;
2977    bool in_use;
2978} BounceBuffer;
2979
2980static BounceBuffer bounce;
2981
2982typedef struct MapClient {
2983    QEMUBH *bh;
2984    QLIST_ENTRY(MapClient) link;
2985} MapClient;
2986
2987QemuMutex map_client_list_lock;
2988static QLIST_HEAD(map_client_list, MapClient) map_client_list
2989    = QLIST_HEAD_INITIALIZER(map_client_list);
2990
2991static void cpu_unregister_map_client_do(MapClient *client)
2992{
2993    QLIST_REMOVE(client, link);
2994    g_free(client);
2995}
2996
2997static void cpu_notify_map_clients_locked(void)
2998{
2999    MapClient *client;
3000
3001    while (!QLIST_EMPTY(&map_client_list)) {
3002        client = QLIST_FIRST(&map_client_list);
3003        qemu_bh_schedule(client->bh);
3004        cpu_unregister_map_client_do(client);
3005    }
3006}
3007
3008void cpu_register_map_client(QEMUBH *bh)
3009{
3010    MapClient *client = g_malloc(sizeof(*client));
3011
3012    qemu_mutex_lock(&map_client_list_lock);
3013    client->bh = bh;
3014    QLIST_INSERT_HEAD(&map_client_list, client, link);
3015    if (!atomic_read(&bounce.in_use)) {
3016        cpu_notify_map_clients_locked();
3017    }
3018    qemu_mutex_unlock(&map_client_list_lock);
3019}
3020
3021void cpu_exec_init_all(void)
3022{
3023    qemu_mutex_init(&ram_list.mutex);
3024    io_mem_init();
3025    memory_map_init();
3026    qemu_mutex_init(&map_client_list_lock);
3027}
3028
3029void cpu_unregister_map_client(QEMUBH *bh)
3030{
3031    MapClient *client;
3032
3033    qemu_mutex_lock(&map_client_list_lock);
3034    QLIST_FOREACH(client, &map_client_list, link) {
3035        if (client->bh == bh) {
3036            cpu_unregister_map_client_do(client);
3037            break;
3038        }
3039    }
3040    qemu_mutex_unlock(&map_client_list_lock);
3041}
3042
3043static void cpu_notify_map_clients(void)
3044{
3045    qemu_mutex_lock(&map_client_list_lock);
3046    cpu_notify_map_clients_locked();
3047    qemu_mutex_unlock(&map_client_list_lock);
3048}
3049
3050bool address_space_access_valid(AddressSpace *as, hwaddr addr,
3051                                int len, bool is_write,
3052                                MemTxAttrs attr)
3053{
3054    MemoryRegion *mr;
3055    hwaddr l, xlat;
3056
3057    rcu_read_lock();
3058    while (len > 0) {
3059        l = len;
3060        mr = address_space_translate(as, addr, &xlat, &l, is_write);
3061        if (!memory_access_is_direct(mr, is_write)) {
3062            l = memory_access_size(mr, l, addr);
3063            if (!memory_region_access_valid(mr, xlat, l, is_write, attr)) {
3064                return false;
3065            }
3066        }
3067
3068        len -= l;
3069        addr += l;
3070    }
3071    rcu_read_unlock();
3072    return true;
3073}
3074
3075/* Map a physical memory region into a host virtual address.
3076 * May map a subset of the requested range, given by and returned in *plen.
3077 * May return NULL if resources needed to perform the mapping are exhausted.
3078 * Use only for reads OR writes - not for read-modify-write operations.
3079 * Use cpu_register_map_client() to know when retrying the map operation is
3080 * likely to succeed.
3081 */
3082void *address_space_map(AddressSpace *as,
3083                        hwaddr addr,
3084                        hwaddr *plen,
3085                        bool is_write)
3086{
3087    hwaddr len = *plen;
3088    hwaddr done = 0;
3089    hwaddr l, xlat, base;
3090    MemoryRegion *mr, *this_mr;
3091    ram_addr_t raddr;
3092    void *ptr;
3093
3094    if (len == 0) {
3095        return NULL;
3096    }
3097
3098    l = len;
3099    rcu_read_lock();
3100    mr = address_space_translate(as, addr, &xlat, &l, is_write);
3101
3102    if (!memory_access_is_direct(mr, is_write)) {
3103        if (atomic_xchg(&bounce.in_use, true)) {
3104            rcu_read_unlock();
3105            return NULL;
3106        }
3107        /* Avoid unbounded allocations */
3108        l = MIN(l, TARGET_PAGE_SIZE);
3109        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3110        bounce.addr = addr;
3111        bounce.len = l;
3112
3113        memory_region_ref(mr);
3114        bounce.mr = mr;
3115        if (!is_write) {
3116            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3117                               bounce.buffer, l);
3118        }
3119
3120        rcu_read_unlock();
3121        *plen = l;
3122        return bounce.buffer;
3123    }
3124
3125    base = xlat;
3126    raddr = memory_region_get_ram_addr(mr);
3127
3128    for (;;) {
3129        len -= l;
3130        addr += l;
3131        done += l;
3132        if (len == 0) {
3133            break;
3134        }
3135
3136        l = len;
3137        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3138        if (this_mr != mr || xlat != base + done) {
3139            break;
3140        }
3141    }
3142
3143    memory_region_ref(mr);
3144    *plen = done;
3145    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3146    rcu_read_unlock();
3147
3148    return ptr;
3149}
3150
3151/* Unmaps a memory region previously mapped by address_space_map().
3152 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3153 * the amount of memory that was actually read or written by the caller.
3154 */
3155void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3156                         int is_write, hwaddr access_len)
3157{
3158    if (buffer != bounce.buffer) {
3159        MemoryRegion *mr;
3160        ram_addr_t addr1;
3161
3162        mr = qemu_ram_addr_from_host(buffer, &addr1);
3163        assert(mr != NULL);
3164        if (is_write) {
3165            invalidate_and_set_dirty(mr, addr1, access_len);
3166        }
3167        if (xen_enabled()) {
3168            xen_invalidate_map_cache_entry(buffer);
3169        }
3170        memory_region_unref(mr);
3171        return;
3172    }
3173    if (is_write) {
3174        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3175                            bounce.buffer, access_len);
3176    }
3177    qemu_vfree(bounce.buffer);
3178    bounce.buffer = NULL;
3179    memory_region_unref(bounce.mr);
3180    atomic_mb_set(&bounce.in_use, false);
3181    cpu_notify_map_clients();
3182}
3183
3184void *cpu_physical_memory_map(hwaddr addr,
3185                              hwaddr *plen,
3186                              int is_write)
3187{
3188    return address_space_map(&address_space_memory, addr, plen, is_write);
3189}
3190
3191void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3192                               int is_write, hwaddr access_len)
3193{
3194    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3195}
3196
3197/* warning: addr must be aligned */
3198static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3199                                                  MemTxAttrs attrs,
3200                                                  MemTxResult *result,
3201                                                  enum device_endian endian)
3202{
3203    uint8_t *ptr;
3204    uint64_t val;
3205    MemoryRegion *mr;
3206    hwaddr l = 4;
3207    hwaddr addr1;
3208    MemTxResult r;
3209    bool release_lock = false;
3210
3211    rcu_read_lock();
3212    mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
3213    if (l < 4 || !memory_access_is_direct(mr, false)) {
3214        release_lock |= prepare_mmio_access(mr);
3215
3216        /* I/O case */
3217        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3218#if defined(TARGET_WORDS_BIGENDIAN)
3219        if (endian == DEVICE_LITTLE_ENDIAN) {
3220            val = bswap32(val);
3221        }
3222#else
3223        if (endian == DEVICE_BIG_ENDIAN) {
3224            val = bswap32(val);
3225        }
3226#endif
3227    } else {
3228        /* RAM case */
3229        ptr = qemu_get_ram_ptr(mr->ram_block,
3230                               (memory_region_get_ram_addr(mr)
3231                                & TARGET_PAGE_MASK)
3232                               + addr1);
3233        switch (endian) {
3234        case DEVICE_LITTLE_ENDIAN:
3235            val = ldl_le_p(ptr);
3236            break;
3237        case DEVICE_BIG_ENDIAN:
3238            val = ldl_be_p(ptr);
3239            break;
3240        default:
3241            val = ldl_p(ptr);
3242            break;
3243        }
3244        r = MEMTX_OK;
3245    }
3246    if (result) {
3247        *result = r;
3248    }
3249    if (release_lock) {
3250        qemu_mutex_unlock_iothread();
3251    }
3252    rcu_read_unlock();
3253    return val;
3254}
3255
3256uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3257                           MemTxAttrs attrs, MemTxResult *result)
3258{
3259    return address_space_ldl_internal(as, addr, attrs, result,
3260                                      DEVICE_NATIVE_ENDIAN);
3261}
3262
3263uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3264                              MemTxAttrs attrs, MemTxResult *result)
3265{
3266    return address_space_ldl_internal(as, addr, attrs, result,
3267                                      DEVICE_LITTLE_ENDIAN);
3268}
3269
3270uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3271                              MemTxAttrs attrs, MemTxResult *result)
3272{
3273    return address_space_ldl_internal(as, addr, attrs, result,
3274                                      DEVICE_BIG_ENDIAN);
3275}
3276
3277uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3278{
3279    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3280}
3281
3282uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3283{
3284    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3285}
3286
3287uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3288{
3289    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3290}
3291
3292/* warning: addr must be aligned */
3293static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3294                                                  MemTxAttrs attrs,
3295                                                  MemTxResult *result,
3296                                                  enum device_endian endian)
3297{
3298    uint8_t *ptr;
3299    uint64_t val;
3300    MemoryRegion *mr;
3301    hwaddr l = 8;
3302    hwaddr addr1;
3303    MemTxResult r;
3304    bool release_lock = false;
3305
3306    rcu_read_lock();
3307    mr = address_space_translate_attr(as, addr, &addr1, &l,
3308                                 false, &attrs);
3309    if (l < 8 || !memory_access_is_direct(mr, false)) {
3310        release_lock |= prepare_mmio_access(mr);
3311
3312        /* I/O case */
3313        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3314#if defined(TARGET_WORDS_BIGENDIAN)
3315        if (endian == DEVICE_LITTLE_ENDIAN) {
3316            val = bswap64(val);
3317        }
3318#else
3319        if (endian == DEVICE_BIG_ENDIAN) {
3320            val = bswap64(val);
3321        }
3322#endif
3323    } else {
3324        /* RAM case */
3325        ptr = qemu_get_ram_ptr(mr->ram_block,
3326                               (memory_region_get_ram_addr(mr)
3327                                & TARGET_PAGE_MASK)
3328                               + addr1);
3329        switch (endian) {
3330        case DEVICE_LITTLE_ENDIAN:
3331            val = ldq_le_p(ptr);
3332            break;
3333        case DEVICE_BIG_ENDIAN:
3334            val = ldq_be_p(ptr);
3335            break;
3336        default:
3337            val = ldq_p(ptr);
3338            break;
3339        }
3340        r = MEMTX_OK;
3341    }
3342    if (result) {
3343        *result = r;
3344    }
3345    if (release_lock) {
3346        qemu_mutex_unlock_iothread();
3347    }
3348    rcu_read_unlock();
3349    return val;
3350}
3351
3352uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3353                           MemTxAttrs attrs, MemTxResult *result)
3354{
3355    return address_space_ldq_internal(as, addr, attrs, result,
3356                                      DEVICE_NATIVE_ENDIAN);
3357}
3358
3359uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3360                           MemTxAttrs attrs, MemTxResult *result)
3361{
3362    return address_space_ldq_internal(as, addr, attrs, result,
3363                                      DEVICE_LITTLE_ENDIAN);
3364}
3365
3366uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3367                           MemTxAttrs attrs, MemTxResult *result)
3368{
3369    return address_space_ldq_internal(as, addr, attrs, result,
3370                                      DEVICE_BIG_ENDIAN);
3371}
3372
3373uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3374{
3375    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3376}
3377
3378uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3379{
3380    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3381}
3382
3383uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3384{
3385    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3386}
3387
3388/* XXX: optimize */
3389uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3390                            MemTxAttrs attrs, MemTxResult *result)
3391{
3392    uint8_t val;
3393    MemTxResult r;
3394
3395    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3396    if (result) {
3397        *result = r;
3398    }
3399    return val;
3400}
3401
3402uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3403{
3404    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3405}
3406
3407/* warning: addr must be aligned */
3408static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3409                                                   hwaddr addr,
3410                                                   MemTxAttrs attrs,
3411                                                   MemTxResult *result,
3412                                                   enum device_endian endian)
3413{
3414    uint8_t *ptr;
3415    uint64_t val;
3416    MemoryRegion *mr;
3417    hwaddr l = 2;
3418    hwaddr addr1;
3419    MemTxResult r;
3420    bool release_lock = false;
3421
3422    rcu_read_lock();
3423    mr = address_space_translate_attr(as, addr, &addr1, &l,
3424                                 false, &attrs);
3425    if (l < 2 || !memory_access_is_direct(mr, false)) {
3426        release_lock |= prepare_mmio_access(mr);
3427
3428        /* I/O case */
3429        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3430#if defined(TARGET_WORDS_BIGENDIAN)
3431        if (endian == DEVICE_LITTLE_ENDIAN) {
3432            val = bswap16(val);
3433        }
3434#else
3435        if (endian == DEVICE_BIG_ENDIAN) {
3436            val = bswap16(val);
3437        }
3438#endif
3439    } else {
3440        /* RAM case */
3441        ptr = qemu_get_ram_ptr(mr->ram_block,
3442                               (memory_region_get_ram_addr(mr)
3443                                & TARGET_PAGE_MASK)
3444                               + addr1);
3445        switch (endian) {
3446        case DEVICE_LITTLE_ENDIAN:
3447            val = lduw_le_p(ptr);
3448            break;
3449        case DEVICE_BIG_ENDIAN:
3450            val = lduw_be_p(ptr);
3451            break;
3452        default:
3453            val = lduw_p(ptr);
3454            break;
3455        }
3456        r = MEMTX_OK;
3457    }
3458    if (result) {
3459        *result = r;
3460    }
3461    if (release_lock) {
3462        qemu_mutex_unlock_iothread();
3463    }
3464    rcu_read_unlock();
3465    return val;
3466}
3467
3468uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3469                           MemTxAttrs attrs, MemTxResult *result)
3470{
3471    return address_space_lduw_internal(as, addr, attrs, result,
3472                                       DEVICE_NATIVE_ENDIAN);
3473}
3474
3475uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3476                           MemTxAttrs attrs, MemTxResult *result)
3477{
3478    return address_space_lduw_internal(as, addr, attrs, result,
3479                                       DEVICE_LITTLE_ENDIAN);
3480}
3481
3482uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3483                           MemTxAttrs attrs, MemTxResult *result)
3484{
3485    return address_space_lduw_internal(as, addr, attrs, result,
3486                                       DEVICE_BIG_ENDIAN);
3487}
3488
3489uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3490{
3491    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3492}
3493
3494uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3495{
3496    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3497}
3498
3499uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3500{
3501    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3502}
3503
3504/* warning: addr must be aligned. The ram page is not masked as dirty
3505   and the code inside is not invalidated. It is useful if the dirty
3506   bits are used to track modified PTEs */
3507void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3508                                MemTxAttrs attrs, MemTxResult *result)
3509{
3510    uint8_t *ptr;
3511    MemoryRegion *mr;
3512    hwaddr l = 4;
3513    hwaddr addr1;
3514    MemTxResult r;
3515    uint8_t dirty_log_mask;
3516    bool release_lock = false;
3517
3518    rcu_read_lock();
3519    mr = address_space_translate_attr(as, addr, &addr1, &l,
3520                                 true, &attrs);
3521    if (l < 4 || !memory_access_is_direct(mr, true)) {
3522        release_lock |= prepare_mmio_access(mr);
3523
3524        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3525    } else {
3526        addr1 += memory_region_get_ram_addr(mr);
3527        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3528        stl_p(ptr, val);
3529
3530        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3531        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3532        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3533        r = MEMTX_OK;
3534    }
3535    if (result) {
3536        *result = r;
3537    }
3538    if (release_lock) {
3539        qemu_mutex_unlock_iothread();
3540    }
3541    rcu_read_unlock();
3542}
3543
3544void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3545{
3546    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3547}
3548
3549/* warning: addr must be aligned */
3550static inline void address_space_stl_internal(AddressSpace *as,
3551                                              hwaddr addr, uint32_t val,
3552                                              MemTxAttrs attrs,
3553                                              MemTxResult *result,
3554                                              enum device_endian endian)
3555{
3556    uint8_t *ptr;
3557    MemoryRegion *mr;
3558    hwaddr l = 4;
3559    hwaddr addr1;
3560    MemTxResult r;
3561    bool release_lock = false;
3562
3563    rcu_read_lock();
3564    mr = address_space_translate_attr(as, addr, &addr1, &l,
3565                                 true, &attrs);
3566    if (l < 4 || !memory_access_is_direct(mr, true)) {
3567        release_lock |= prepare_mmio_access(mr);
3568
3569#if defined(TARGET_WORDS_BIGENDIAN)
3570        if (endian == DEVICE_LITTLE_ENDIAN) {
3571            val = bswap32(val);
3572        }
3573#else
3574        if (endian == DEVICE_BIG_ENDIAN) {
3575            val = bswap32(val);
3576        }
3577#endif
3578        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3579    } else {
3580        /* RAM case */
3581        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3582        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3583        switch (endian) {
3584        case DEVICE_LITTLE_ENDIAN:
3585            stl_le_p(ptr, val);
3586            break;
3587        case DEVICE_BIG_ENDIAN:
3588            stl_be_p(ptr, val);
3589            break;
3590        default:
3591            stl_p(ptr, val);
3592            break;
3593        }
3594        invalidate_and_set_dirty(mr, addr1, 4);
3595        r = MEMTX_OK;
3596    }
3597    if (result) {
3598        *result = r;
3599    }
3600    if (release_lock) {
3601        qemu_mutex_unlock_iothread();
3602    }
3603    rcu_read_unlock();
3604}
3605
3606void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3607                       MemTxAttrs attrs, MemTxResult *result)
3608{
3609    address_space_stl_internal(as, addr, val, attrs, result,
3610                               DEVICE_NATIVE_ENDIAN);
3611}
3612
3613void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3614                       MemTxAttrs attrs, MemTxResult *result)
3615{
3616    address_space_stl_internal(as, addr, val, attrs, result,
3617                               DEVICE_LITTLE_ENDIAN);
3618}
3619
3620void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3621                       MemTxAttrs attrs, MemTxResult *result)
3622{
3623    address_space_stl_internal(as, addr, val, attrs, result,
3624                               DEVICE_BIG_ENDIAN);
3625}
3626
3627void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3628{
3629    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3630}
3631
3632void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3633{
3634    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3635}
3636
3637void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3638{
3639    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3640}
3641
3642/* XXX: optimize */
3643void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3644                       MemTxAttrs attrs, MemTxResult *result)
3645{
3646    uint8_t v = val;
3647    MemTxResult r;
3648
3649    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3650    if (result) {
3651        *result = r;
3652    }
3653}
3654
3655void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3656{
3657    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3658}
3659
3660/* warning: addr must be aligned */
3661static inline void address_space_stw_internal(AddressSpace *as,
3662                                              hwaddr addr, uint32_t val,
3663                                              MemTxAttrs attrs,
3664                                              MemTxResult *result,
3665                                              enum device_endian endian)
3666{
3667    uint8_t *ptr;
3668    MemoryRegion *mr;
3669    hwaddr l = 2;
3670    hwaddr addr1;
3671    MemTxResult r;
3672    bool release_lock = false;
3673
3674    rcu_read_lock();
3675    mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
3676    if (l < 2 || !memory_access_is_direct(mr, true)) {
3677        release_lock |= prepare_mmio_access(mr);
3678
3679#if defined(TARGET_WORDS_BIGENDIAN)
3680        if (endian == DEVICE_LITTLE_ENDIAN) {
3681            val = bswap16(val);
3682        }
3683#else
3684        if (endian == DEVICE_BIG_ENDIAN) {
3685            val = bswap16(val);
3686        }
3687#endif
3688        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3689    } else {
3690        /* RAM case */
3691        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3692        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3693        switch (endian) {
3694        case DEVICE_LITTLE_ENDIAN:
3695            stw_le_p(ptr, val);
3696            break;
3697        case DEVICE_BIG_ENDIAN:
3698            stw_be_p(ptr, val);
3699            break;
3700        default:
3701            stw_p(ptr, val);
3702            break;
3703        }
3704        invalidate_and_set_dirty(mr, addr1, 2);
3705        r = MEMTX_OK;
3706    }
3707    if (result) {
3708        *result = r;
3709    }
3710    if (release_lock) {
3711        qemu_mutex_unlock_iothread();
3712    }
3713    rcu_read_unlock();
3714}
3715
3716void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3717                       MemTxAttrs attrs, MemTxResult *result)
3718{
3719    address_space_stw_internal(as, addr, val, attrs, result,
3720                               DEVICE_NATIVE_ENDIAN);
3721}
3722
3723void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3724                       MemTxAttrs attrs, MemTxResult *result)
3725{
3726    address_space_stw_internal(as, addr, val, attrs, result,
3727                               DEVICE_LITTLE_ENDIAN);
3728}
3729
3730void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3731                       MemTxAttrs attrs, MemTxResult *result)
3732{
3733    address_space_stw_internal(as, addr, val, attrs, result,
3734                               DEVICE_BIG_ENDIAN);
3735}
3736
3737void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3738{
3739    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3740}
3741
3742void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3743{
3744    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3745}
3746
3747void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3748{
3749    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3750}
3751
3752/* XXX: optimize */
3753void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3754                       MemTxAttrs attrs, MemTxResult *result)
3755{
3756    MemTxResult r;
3757    val = tswap64(val);
3758    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3759    if (result) {
3760        *result = r;
3761    }
3762}
3763
3764void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3765                       MemTxAttrs attrs, MemTxResult *result)
3766{
3767    MemTxResult r;
3768    val = cpu_to_le64(val);
3769    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3770    if (result) {
3771        *result = r;
3772    }
3773}
3774void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3775                       MemTxAttrs attrs, MemTxResult *result)
3776{
3777    MemTxResult r;
3778    val = cpu_to_be64(val);
3779    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3780    if (result) {
3781        *result = r;
3782    }
3783}
3784
3785void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3786{
3787    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3788}
3789
3790void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3791{
3792    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3793}
3794
3795void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3796{
3797    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3798}
3799
3800/* virtual memory access for debug (includes writing to ROM) */
3801int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3802                        uint8_t *buf, int len, int is_write)
3803{
3804    int l;
3805    hwaddr phys_addr;
3806    target_ulong page;
3807
3808    while (len > 0) {
3809        int asidx;
3810        MemTxAttrs attrs;
3811
3812        page = addr & TARGET_PAGE_MASK;
3813        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3814        asidx = cpu_asidx_from_attrs(cpu, attrs);
3815        /* if no physical page mapped, return an error */
3816        if (phys_addr == -1)
3817            return -1;
3818        l = (page + TARGET_PAGE_SIZE) - addr;
3819        if (l > len)
3820            l = len;
3821        phys_addr += (addr & ~TARGET_PAGE_MASK);
3822        if (is_write) {
3823            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3824                                          phys_addr, buf, l);
3825        } else {
3826            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3827                             MEMTXATTRS_UNSPECIFIED,
3828                             buf, l, 0);
3829        }
3830        len -= l;
3831        buf += l;
3832        addr += l;
3833    }
3834    return 0;
3835}
3836
3837/*
3838 * Allows code that needs to deal with migration bitmaps etc to still be built
3839 * target independent.
3840 */
3841size_t qemu_target_page_bits(void)
3842{
3843    return TARGET_PAGE_BITS;
3844}
3845
3846#endif
3847
3848/*
3849 * A helper function for the _utterly broken_ virtio device model to find out if
3850 * it's running on a big endian machine. Don't do this at home kids!
3851 */
3852bool target_words_bigendian(void);
3853bool target_words_bigendian(void)
3854{
3855#if defined(TARGET_WORDS_BIGENDIAN)
3856    return true;
3857#else
3858    return false;
3859#endif
3860}
3861
3862#ifndef CONFIG_USER_ONLY
3863bool cpu_physical_memory_is_io(hwaddr phys_addr)
3864{
3865    MemoryRegion*mr;
3866    hwaddr l = 1;
3867    bool res;
3868
3869    rcu_read_lock();
3870    mr = address_space_translate(&address_space_memory,
3871                                 phys_addr, &phys_addr, &l, false);
3872
3873    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3874    rcu_read_unlock();
3875    return res;
3876}
3877
3878int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3879{
3880    RAMBlock *block;
3881    int ret = 0;
3882
3883    rcu_read_lock();
3884    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3885        ret = func(block->idstr, block->host, block->offset,
3886                   block->used_length, opaque);
3887        if (ret) {
3888            break;
3889        }
3890    }
3891    rcu_read_unlock();
3892    return ret;
3893}
3894#endif
3895
3896/* FIXME: rewrite - badly needed */
3897
3898void cpu_halt_reset_common(CPUState *cpu, bool *change, bool val, bool force)
3899{
3900    bool new_val;
3901    bool old_val = cpu->reset_pin || cpu->halt_pin || cpu->arch_halt_pin;
3902
3903    if (change) {
3904        *change = val;
3905    }
3906    new_val = cpu->reset_pin || cpu->halt_pin || cpu->arch_halt_pin;
3907
3908    if (new_val) {
3909        cpu_interrupt(cpu, CPU_INTERRUPT_HALT);
3910    }
3911
3912    if (new_val == old_val && !force) {
3913        return;
3914    }
3915
3916    if (!new_val) {
3917        cpu_reset_interrupt(cpu, CPU_INTERRUPT_HALT);
3918        cpu_interrupt(cpu, CPU_INTERRUPT_EXITTB);
3919        cpu->halted = 0;
3920    }
3921}
3922
3923void cpu_reset_gpio(void *opaque, int irq, int level)
3924{
3925    CPUState *cpu = CPU(opaque);
3926
3927    if (level == cpu->reset_pin) {
3928        return;
3929    }
3930    if (level || cpu->reset_pin) {
3931        cpu_reset(cpu);
3932        cpu_halt_reset_common(cpu, &cpu->reset_pin, level, true);
3933    } else {
3934        cpu_halt_reset_common(cpu, &cpu->reset_pin, level, false);
3935    }
3936}
3937
3938void cpu_halt_gpio(void *opaque, int irq, int level)
3939{
3940    CPUState *cpu = CPU(opaque);
3941
3942    cpu_halt_reset_common(cpu, &cpu->halt_pin, level, false);
3943}
3944