qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#endif
  23
  24#include "qemu/cutils.h"
  25#include "cpu.h"
  26#include "exec/exec-all.h"
  27#include "tcg.h"
  28#include "hw/qdev-core.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#include "hw/xen/xen.h"
  32#endif
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "qemu/timer.h"
  36#include "qemu/config-file.h"
  37#include "qemu/error-report.h"
  38#if defined(CONFIG_USER_ONLY)
  39#include "qemu.h"
  40#else /* !CONFIG_USER_ONLY */
  41#include "hw/hw.h"
  42#include "exec/memory.h"
  43#include "exec/ioport.h"
  44#include "sysemu/dma.h"
  45#include "exec/address-spaces.h"
  46#include "sysemu/xen-mapcache.h"
  47#include "trace.h"
  48#endif
  49#include "exec/cpu-all.h"
  50#include "qemu/rcu_queue.h"
  51#include "qemu/main-loop.h"
  52#include "translate-all.h"
  53#include "sysemu/replay.h"
  54
  55#include "exec/memory-internal.h"
  56#include "exec/ram_addr.h"
  57#include "exec/log.h"
  58
  59#include "migration/vmstate.h"
  60
  61#include "qemu/range.h"
  62#ifndef _WIN32
  63#include "qemu/mmap-alloc.h"
  64#endif
  65#ifdef _WIN32
  66#include <io.h>
  67#endif
  68
  69//#define DEBUG_SUBPAGE
  70
  71#if !defined(CONFIG_USER_ONLY)
  72/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  73 * are protected by the ramlist lock.
  74 */
  75RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  76
  77static MemoryRegion *system_memory;
  78static MemoryRegion *system_io;
  79
  80AddressSpace address_space_io;
  81AddressSpace address_space_memory;
  82
  83MemoryRegion io_mem_rom, io_mem_notdirty;
  84static MemoryRegion io_mem_unassigned;
  85
  86/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  87#define RAM_PREALLOC   (1 << 0)
  88
  89/* RAM is mmap-ed with MAP_SHARED */
  90#define RAM_SHARED     (1 << 1)
  91
  92/* Only a portion of RAM (used_length) is actually used, and migrated.
  93 * This used_length size can change across reboots.
  94 */
  95#define RAM_RESIZEABLE (1 << 2)
  96
  97/* RAM is backed by an mmapped file.
  98 */
  99#define RAM_FILE (1 << 3)
 100#endif
 101
 102#ifdef TARGET_PAGE_BITS_VARY
 103int target_page_bits;
 104bool target_page_bits_decided;
 105#endif
 106
 107struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 108/* current CPU in the current thread. It is only valid inside
 109   cpu_exec() */
 110__thread CPUState *current_cpu;
 111/* 0 = Do not count executed instructions.
 112   1 = Precise instruction counting.
 113   2 = Adaptive rate instruction counting.  */
 114int use_icount;
 115
 116bool set_preferred_target_page_bits(int bits)
 117{
 118    /* The target page size is the lowest common denominator for all
 119     * the CPUs in the system, so we can only make it smaller, never
 120     * larger. And we can't make it smaller once we've committed to
 121     * a particular size.
 122     */
 123#ifdef TARGET_PAGE_BITS_VARY
 124    assert(bits >= TARGET_PAGE_BITS_MIN);
 125    if (target_page_bits == 0 || target_page_bits > bits) {
 126        if (target_page_bits_decided) {
 127            return false;
 128        }
 129        target_page_bits = bits;
 130    }
 131#endif
 132    return true;
 133}
 134
 135#if !defined(CONFIG_USER_ONLY)
 136
 137static void finalize_target_page_bits(void)
 138{
 139#ifdef TARGET_PAGE_BITS_VARY
 140    if (target_page_bits == 0) {
 141        target_page_bits = TARGET_PAGE_BITS_MIN;
 142    }
 143    target_page_bits_decided = true;
 144#endif
 145}
 146
 147typedef struct PhysPageEntry PhysPageEntry;
 148
 149struct PhysPageEntry {
 150    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 151    uint32_t skip : 6;
 152     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 153    uint32_t ptr : 26;
 154};
 155
 156#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 157
 158/* Size of the L2 (and L3, etc) page tables.  */
 159#define ADDR_SPACE_BITS 64
 160
 161#define P_L2_BITS 9
 162#define P_L2_SIZE (1 << P_L2_BITS)
 163
 164#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 165
 166typedef PhysPageEntry Node[P_L2_SIZE];
 167
 168typedef struct PhysPageMap {
 169    struct rcu_head rcu;
 170
 171    unsigned sections_nb;
 172    unsigned sections_nb_alloc;
 173    unsigned nodes_nb;
 174    unsigned nodes_nb_alloc;
 175    Node *nodes;
 176    MemoryRegionSection *sections;
 177} PhysPageMap;
 178
 179struct AddressSpaceDispatch {
 180    struct rcu_head rcu;
 181
 182    MemoryRegionSection *mru_section;
 183    /* This is a multi-level map on the physical address space.
 184     * The bottom level has pointers to MemoryRegionSections.
 185     */
 186    PhysPageEntry phys_map;
 187    PhysPageMap map;
 188    AddressSpace *as;
 189};
 190
 191#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 192typedef struct subpage_t {
 193    MemoryRegion iomem;
 194    AddressSpace *as;
 195    hwaddr base;
 196    uint16_t sub_section[];
 197} subpage_t;
 198
 199#define PHYS_SECTION_UNASSIGNED 0
 200#define PHYS_SECTION_NOTDIRTY 1
 201#define PHYS_SECTION_ROM 2
 202#define PHYS_SECTION_WATCH 3
 203
 204static void io_mem_init(void);
 205static void memory_map_init(void);
 206static void tcg_commit(MemoryListener *listener);
 207
 208static MemoryRegion io_mem_watch;
 209
 210/**
 211 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 212 * @cpu: the CPU whose AddressSpace this is
 213 * @as: the AddressSpace itself
 214 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 215 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 216 */
 217struct CPUAddressSpace {
 218    CPUState *cpu;
 219    AddressSpace *as;
 220    struct AddressSpaceDispatch *memory_dispatch;
 221    MemoryListener tcg_as_listener;
 222};
 223
 224#endif
 225
 226#if !defined(CONFIG_USER_ONLY)
 227
 228static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 229{
 230    static unsigned alloc_hint = 16;
 231    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 232        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 233        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 234        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 235        alloc_hint = map->nodes_nb_alloc;
 236    }
 237}
 238
 239static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 240{
 241    unsigned i;
 242    uint32_t ret;
 243    PhysPageEntry e;
 244    PhysPageEntry *p;
 245
 246    ret = map->nodes_nb++;
 247    p = map->nodes[ret];
 248    assert(ret != PHYS_MAP_NODE_NIL);
 249    assert(ret != map->nodes_nb_alloc);
 250
 251    e.skip = leaf ? 0 : 1;
 252    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 253    for (i = 0; i < P_L2_SIZE; ++i) {
 254        memcpy(&p[i], &e, sizeof(e));
 255    }
 256    return ret;
 257}
 258
 259static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 260                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 261                                int level)
 262{
 263    PhysPageEntry *p;
 264    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 265
 266    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 267        lp->ptr = phys_map_node_alloc(map, level == 0);
 268    }
 269    p = map->nodes[lp->ptr];
 270    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 271
 272    while (*nb && lp < &p[P_L2_SIZE]) {
 273        if ((*index & (step - 1)) == 0 && *nb >= step) {
 274            lp->skip = 0;
 275            lp->ptr = leaf;
 276            *index += step;
 277            *nb -= step;
 278        } else {
 279            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 280        }
 281        ++lp;
 282    }
 283}
 284
 285static void phys_page_set(AddressSpaceDispatch *d,
 286                          hwaddr index, hwaddr nb,
 287                          uint16_t leaf)
 288{
 289    /* Wildly overreserve - it doesn't matter much. */
 290    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 291
 292    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 293}
 294
 295/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 296 * and update our entry so we can skip it and go directly to the destination.
 297 */
 298static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 299{
 300    unsigned valid_ptr = P_L2_SIZE;
 301    int valid = 0;
 302    PhysPageEntry *p;
 303    int i;
 304
 305    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 306        return;
 307    }
 308
 309    p = nodes[lp->ptr];
 310    for (i = 0; i < P_L2_SIZE; i++) {
 311        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 312            continue;
 313        }
 314
 315        valid_ptr = i;
 316        valid++;
 317        if (p[i].skip) {
 318            phys_page_compact(&p[i], nodes);
 319        }
 320    }
 321
 322    /* We can only compress if there's only one child. */
 323    if (valid != 1) {
 324        return;
 325    }
 326
 327    assert(valid_ptr < P_L2_SIZE);
 328
 329    /* Don't compress if it won't fit in the # of bits we have. */
 330    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 331        return;
 332    }
 333
 334    lp->ptr = p[valid_ptr].ptr;
 335    if (!p[valid_ptr].skip) {
 336        /* If our only child is a leaf, make this a leaf. */
 337        /* By design, we should have made this node a leaf to begin with so we
 338         * should never reach here.
 339         * But since it's so simple to handle this, let's do it just in case we
 340         * change this rule.
 341         */
 342        lp->skip = 0;
 343    } else {
 344        lp->skip += p[valid_ptr].skip;
 345    }
 346}
 347
 348static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 349{
 350    if (d->phys_map.skip) {
 351        phys_page_compact(&d->phys_map, d->map.nodes);
 352    }
 353}
 354
 355static inline bool section_covers_addr(const MemoryRegionSection *section,
 356                                       hwaddr addr)
 357{
 358    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 359     * the section must cover the entire address space.
 360     */
 361    return int128_gethi(section->size) ||
 362           range_covers_byte(section->offset_within_address_space,
 363                             int128_getlo(section->size), addr);
 364}
 365
 366static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 367                                           Node *nodes, MemoryRegionSection *sections)
 368{
 369    PhysPageEntry *p;
 370    hwaddr index = addr >> TARGET_PAGE_BITS;
 371    int i;
 372
 373    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 374        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 375            return &sections[PHYS_SECTION_UNASSIGNED];
 376        }
 377        p = nodes[lp.ptr];
 378        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 379    }
 380
 381    if (section_covers_addr(&sections[lp.ptr], addr)) {
 382        return &sections[lp.ptr];
 383    } else {
 384        return &sections[PHYS_SECTION_UNASSIGNED];
 385    }
 386}
 387
 388bool memory_region_is_unassigned(MemoryRegion *mr)
 389{
 390    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 391        && mr != &io_mem_watch;
 392}
 393
 394/* Called from RCU critical section */
 395static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 396                                                        hwaddr addr,
 397                                                        bool resolve_subpage)
 398{
 399    MemoryRegionSection *section = atomic_read(&d->mru_section);
 400    subpage_t *subpage;
 401    bool update;
 402
 403    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 404        section_covers_addr(section, addr)) {
 405        update = false;
 406    } else {
 407        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 408                                 d->map.sections);
 409        update = true;
 410    }
 411    if (resolve_subpage && section->mr->subpage) {
 412        subpage = container_of(section->mr, subpage_t, iomem);
 413        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 414    }
 415    if (update) {
 416        atomic_set(&d->mru_section, section);
 417    }
 418    return section;
 419}
 420
 421/* Called from RCU critical section */
 422static MemoryRegionSection *
 423address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 424                                 hwaddr *plen, bool resolve_subpage)
 425{
 426    MemoryRegionSection *section;
 427    MemoryRegion *mr;
 428    Int128 diff;
 429
 430    section = address_space_lookup_region(d, addr, resolve_subpage);
 431    /* Compute offset within MemoryRegionSection */
 432    addr -= section->offset_within_address_space;
 433
 434    /* Compute offset within MemoryRegion */
 435    *xlat = addr + section->offset_within_region;
 436
 437    mr = section->mr;
 438
 439    /* MMIO registers can be expected to perform full-width accesses based only
 440     * on their address, without considering adjacent registers that could
 441     * decode to completely different MemoryRegions.  When such registers
 442     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 443     * regions overlap wildly.  For this reason we cannot clamp the accesses
 444     * here.
 445     *
 446     * If the length is small (as is the case for address_space_ldl/stl),
 447     * everything works fine.  If the incoming length is large, however,
 448     * the caller really has to do the clamping through memory_access_size.
 449     */
 450    if (memory_region_is_ram(mr)) {
 451        diff = int128_sub(section->size, int128_make64(addr));
 452        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 453    }
 454    return section;
 455}
 456
 457MemoryRegion *address_space_translate_attr(AddressSpace *as, hwaddr addr,
 458                                           hwaddr *xlat, hwaddr *plen,
 459                                           bool is_write,
 460                                           MemTxAttrs *attr)
 461{
 462    IOMMUTLBEntry iotlb;
 463    MemoryRegionSection *section;
 464    MemoryRegion *mr;
 465
 466    for (;;) {
 467        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 468        section = address_space_translate_internal(d, addr, &addr, plen, true);
 469        mr = section->mr;
 470
 471        if (!mr->iommu_ops) {
 472            break;
 473        }
 474
 475        if (mr->iommu_ops->translate_attr) {
 476            iotlb = mr->iommu_ops->translate_attr(mr, addr, is_write, attr);
 477        } else {
 478            iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 479        }
 480
 481        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 482                | (addr & iotlb.addr_mask));
 483        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 484        if (!(iotlb.perm & (1 << is_write))) {
 485            mr = &io_mem_unassigned;
 486            break;
 487        }
 488
 489        as = iotlb.target_as;
 490    }
 491
 492    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 493        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 494        *plen = MIN(page, *plen);
 495    }
 496
 497    *xlat = addr;
 498    return mr;
 499}
 500
 501MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 502                                      hwaddr *xlat, hwaddr *plen,
 503                                      bool is_write)
 504{
 505    MemTxAttrs attr = MEMTXATTRS_UNSPECIFIED;
 506    return address_space_translate_attr(as, addr, xlat, plen, is_write,
 507                                        &attr);
 508}
 509
 510/* Called from RCU critical section */
 511MemoryRegionSection *
 512address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 513                                  hwaddr *xlat, hwaddr *plen, int *prot,
 514                                  MemTxAttrs *attr)
 515{
 516    MemoryRegionSection *section;
 517    AddressSpace *as = cpu->cpu_ases[asidx].memory_dispatch->as;
 518
 519    IOMMUTLBEntry iotlb;
 520    struct {
 521        MemoryRegionSection *section;
 522        hwaddr addr;
 523        hwaddr len;
 524    } root =  { .section = NULL, .addr = addr};
 525    AddressSpace *orig_as = as;
 526    MemoryRegion *mr;
 527    hwaddr len = *plen;
 528
 529    assert(prot);
 530
 531    for (;;) {
 532        /* Xilinx: Use the Address Space dispatch for the XMPU */
 533        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 534        section = address_space_translate_internal(d, addr, &addr, plen, false);
 535        mr = section->mr;
 536
 537        if (!mr->iommu_ops) {
 538            break;
 539        }
 540
 541        /* FIXME: these are not necessarily accesses, so is_write doesn't make
 542           sense!  */
 543        if (mr->iommu_ops->translate_attr) {
 544            iotlb = mr->iommu_ops->translate_attr(mr, addr, false, attr);
 545        } else {
 546            iotlb = mr->iommu_ops->translate(mr, addr, false);
 547        }
 548        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 549                | (addr & iotlb.addr_mask));
 550        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 551        as = iotlb.target_as;
 552
 553        if (!root.section && orig_as != as) {
 554            root.section = section;
 555            root.len = *plen;
 556        }
 557    }
 558
 559    *plen = len;
 560    *xlat = addr;
 561
 562    /* If the IOMMU translated addr into IO in a different AS, refer to
 563     * the IOMMU itself and do a slow translated access at access time.
 564     * TODO: If the iotlb could record dst AS, this wouldn't be needed.
 565     */
 566    if (!memory_region_is_ram(section->mr) && as != orig_as) {
 567        *plen = root.len;
 568        *xlat = root.addr;
 569        section = root.section;
 570    }
 571//    qemu_log("as=%p mr=%p addr=%lx len=%lx\n", as, section->mr, *xlat, *plen);
 572    return section;
 573}
 574#endif
 575
 576#if !defined(CONFIG_USER_ONLY)
 577
 578static int cpu_common_post_load(void *opaque, int version_id)
 579{
 580    CPUState *cpu = opaque;
 581
 582    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 583       version_id is increased. */
 584    cpu->interrupt_request &= ~0x01;
 585    tlb_flush(cpu, 1);
 586
 587    return 0;
 588}
 589
 590static int cpu_common_pre_load(void *opaque)
 591{
 592    CPUState *cpu = opaque;
 593
 594    cpu->exception_index = -1;
 595
 596    return 0;
 597}
 598
 599static bool cpu_common_exception_index_needed(void *opaque)
 600{
 601    CPUState *cpu = opaque;
 602
 603    return tcg_enabled() && cpu->exception_index != -1;
 604}
 605
 606static const VMStateDescription vmstate_cpu_common_exception_index = {
 607    .name = "cpu_common/exception_index",
 608    .version_id = 1,
 609    .minimum_version_id = 1,
 610    .needed = cpu_common_exception_index_needed,
 611    .fields = (VMStateField[]) {
 612        VMSTATE_INT32(exception_index, CPUState),
 613        VMSTATE_END_OF_LIST()
 614    }
 615};
 616
 617static bool cpu_common_crash_occurred_needed(void *opaque)
 618{
 619    CPUState *cpu = opaque;
 620
 621    return cpu->crash_occurred;
 622}
 623
 624static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 625    .name = "cpu_common/crash_occurred",
 626    .version_id = 1,
 627    .minimum_version_id = 1,
 628    .needed = cpu_common_crash_occurred_needed,
 629    .fields = (VMStateField[]) {
 630        VMSTATE_BOOL(crash_occurred, CPUState),
 631        VMSTATE_END_OF_LIST()
 632    }
 633};
 634
 635const VMStateDescription vmstate_cpu_common = {
 636    .name = "cpu_common",
 637    .version_id = 1,
 638    .minimum_version_id = 1,
 639    .pre_load = cpu_common_pre_load,
 640    .post_load = cpu_common_post_load,
 641    .fields = (VMStateField[]) {
 642        VMSTATE_UINT32(halted, CPUState),
 643        VMSTATE_UINT32(interrupt_request, CPUState),
 644        VMSTATE_END_OF_LIST()
 645    },
 646    .subsections = (const VMStateDescription*[]) {
 647        &vmstate_cpu_common_exception_index,
 648        &vmstate_cpu_common_crash_occurred,
 649        NULL
 650    }
 651};
 652
 653#endif
 654
 655CPUState *qemu_get_cpu(int index)
 656{
 657    CPUState *cpu;
 658
 659    CPU_FOREACH(cpu) {
 660        if (cpu->cpu_index == index) {
 661            return cpu;
 662        }
 663    }
 664
 665    return NULL;
 666}
 667
 668#if !defined(CONFIG_USER_ONLY)
 669void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 670{
 671    CPUAddressSpace *newas;
 672
 673    /* Target code should have set num_ases before calling us */
 674    assert(asidx < cpu->num_ases);
 675
 676    if (asidx == 0) {
 677        /* address space 0 gets the convenience alias */
 678        cpu->as = as;
 679    }
 680
 681    /* KVM cannot currently support multiple address spaces. */
 682    assert(asidx == 0 || !kvm_enabled());
 683
 684    if (!cpu->cpu_ases) {
 685        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 686    }
 687
 688    newas = &cpu->cpu_ases[asidx];
 689    newas->cpu = cpu;
 690    newas->as = as;
 691    if (tcg_enabled()) {
 692        newas->tcg_as_listener.commit = tcg_commit;
 693        memory_listener_register(&newas->tcg_as_listener, as);
 694    }
 695}
 696
 697AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 698{
 699    /* Return the AddressSpace corresponding to the specified index */
 700    return cpu->cpu_ases[asidx].as;
 701}
 702#endif
 703
 704void cpu_exec_unrealizefn(CPUState *cpu)
 705{
 706    CPUClass *cc = CPU_GET_CLASS(cpu);
 707
 708    cpu_list_remove(cpu);
 709
 710    if (cc->vmsd != NULL) {
 711        vmstate_unregister(NULL, cc->vmsd, cpu);
 712    }
 713    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 714        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 715    }
 716}
 717
 718void cpu_exec_initfn(CPUState *cpu)
 719{
 720    cpu->as = NULL;
 721    cpu->num_ases = 0;
 722
 723#ifndef CONFIG_USER_ONLY
 724    cpu->thread_id = qemu_get_thread_id();
 725
 726    /* This is a softmmu CPU object, so create a property for it
 727     * so users can wire up its memory. (This can't go in qom/cpu.c
 728     * because that file is compiled only once for both user-mode
 729     * and system builds.) The default if no link is set up is to use
 730     * the system address space.
 731     */
 732    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 733                             (Object **)&cpu->memory,
 734                             qdev_prop_allow_set_link_before_realize,
 735                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 736                             &error_abort);
 737    cpu->memory = system_memory;
 738    object_ref(OBJECT(cpu->memory));
 739#endif
 740}
 741
 742void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 743{
 744    CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 745
 746    cpu_list_add(cpu);
 747
 748#ifndef CONFIG_USER_ONLY
 749    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 750        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 751    }
 752    if (cc->vmsd != NULL) {
 753        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 754    }
 755#endif
 756}
 757
 758static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 759{
 760    /* Flush the whole TB as this will not have race conditions
 761     * even if we don't have proper locking yet.
 762     * Ideally we would just invalidate the TBs for the
 763     * specified PC.
 764     */
 765    tb_flush(cpu);
 766}
 767
 768#if defined(CONFIG_USER_ONLY)
 769void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 770
 771{
 772}
 773
 774int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 775                          int flags)
 776{
 777    return -ENOSYS;
 778}
 779
 780void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 781{
 782}
 783
 784int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 785                          int flags, CPUWatchpoint **watchpoint)
 786{
 787    return -ENOSYS;
 788}
 789#else
 790/* Add a watchpoint.  */
 791int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 792                          int flags, CPUWatchpoint **watchpoint)
 793{
 794    CPUWatchpoint *wp;
 795
 796    /* forbid ranges which are empty or run off the end of the address space */
 797    if (len == 0 || (addr + len - 1) < addr) {
 798        error_report("tried to set invalid watchpoint at %"
 799                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 800        return -EINVAL;
 801    }
 802    wp = g_malloc(sizeof(*wp));
 803
 804    wp->vaddr = addr;
 805    wp->len = len;
 806    wp->flags = flags;
 807
 808    /* keep all GDB-injected watchpoints in front */
 809    if (flags & BP_GDB) {
 810        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 811    } else {
 812        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 813    }
 814
 815    tlb_flush_page(cpu, addr);
 816
 817    if (watchpoint)
 818        *watchpoint = wp;
 819    return 0;
 820}
 821
 822/* Remove a specific watchpoint.  */
 823int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 824                          int flags)
 825{
 826    CPUWatchpoint *wp;
 827
 828    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 829        if (addr == wp->vaddr && len == wp->len
 830                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 831            cpu_watchpoint_remove_by_ref(cpu, wp);
 832            return 0;
 833        }
 834    }
 835    return -ENOENT;
 836}
 837
 838/* Remove a specific watchpoint by reference.  */
 839void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 840{
 841    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 842
 843    tlb_flush_page(cpu, watchpoint->vaddr);
 844
 845    g_free(watchpoint);
 846}
 847
 848/* Remove all matching watchpoints.  */
 849void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 850{
 851    CPUWatchpoint *wp, *next;
 852
 853    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 854        if (wp->flags & mask) {
 855            cpu_watchpoint_remove_by_ref(cpu, wp);
 856        }
 857    }
 858}
 859
 860/* Return true if this watchpoint address matches the specified
 861 * access (ie the address range covered by the watchpoint overlaps
 862 * partially or completely with the address range covered by the
 863 * access).
 864 */
 865static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 866                                                  vaddr addr,
 867                                                  vaddr len)
 868{
 869    /* We know the lengths are non-zero, but a little caution is
 870     * required to avoid errors in the case where the range ends
 871     * exactly at the top of the address space and so addr + len
 872     * wraps round to zero.
 873     */
 874    vaddr wpend = wp->vaddr + wp->len - 1;
 875    vaddr addrend = addr + len - 1;
 876
 877    return !(addr > wpend || wp->vaddr > addrend);
 878}
 879
 880#endif
 881
 882/* Add a breakpoint.  */
 883int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 884                          CPUBreakpoint **breakpoint)
 885{
 886    CPUBreakpoint *bp;
 887
 888    bp = g_malloc(sizeof(*bp));
 889
 890    bp->pc = pc;
 891    bp->flags = flags;
 892
 893    /* keep all GDB-injected breakpoints in front */
 894    if (flags & BP_GDB) {
 895        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 896    } else {
 897        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 898    }
 899
 900    breakpoint_invalidate(cpu, pc);
 901
 902    if (breakpoint) {
 903        *breakpoint = bp;
 904    }
 905    return 0;
 906}
 907
 908/* Remove a specific breakpoint.  */
 909int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 910{
 911    CPUBreakpoint *bp;
 912
 913    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 914        if (bp->pc == pc && bp->flags == flags) {
 915            cpu_breakpoint_remove_by_ref(cpu, bp);
 916            return 0;
 917        }
 918    }
 919    return -ENOENT;
 920}
 921
 922/* Remove a specific breakpoint by reference.  */
 923void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 924{
 925    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 926
 927    breakpoint_invalidate(cpu, breakpoint->pc);
 928
 929    g_free(breakpoint);
 930}
 931
 932/* Remove all matching breakpoints. */
 933void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 934{
 935    CPUBreakpoint *bp, *next;
 936
 937    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 938        if (bp->flags & mask) {
 939            cpu_breakpoint_remove_by_ref(cpu, bp);
 940        }
 941    }
 942}
 943
 944/* enable or disable single step mode. EXCP_DEBUG is returned by the
 945   CPU loop after each instruction */
 946void cpu_single_step(CPUState *cpu, int enabled)
 947{
 948    if (cpu->singlestep_enabled != enabled) {
 949        cpu->singlestep_enabled = enabled;
 950        if (kvm_enabled()) {
 951            kvm_update_guest_debug(cpu, 0);
 952        } else {
 953            /* must flush all the translated code to avoid inconsistencies */
 954            /* XXX: only flush what is necessary */
 955            tb_flush(cpu);
 956        }
 957    }
 958}
 959
 960void cpu_abort(CPUState *cpu, const char *fmt, ...)
 961{
 962    va_list ap;
 963    va_list ap2;
 964
 965    va_start(ap, fmt);
 966    va_copy(ap2, ap);
 967    fprintf(stderr, "qemu: fatal: ");
 968    vfprintf(stderr, fmt, ap);
 969    fprintf(stderr, "\n");
 970    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 971    if (qemu_log_separate()) {
 972        qemu_log_lock();
 973        qemu_log("qemu: fatal: ");
 974        qemu_log_vprintf(fmt, ap2);
 975        qemu_log("\n");
 976        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 977        qemu_log_flush();
 978        qemu_log_unlock();
 979        qemu_log_close();
 980    }
 981    va_end(ap2);
 982    va_end(ap);
 983    replay_finish();
 984#if defined(CONFIG_USER_ONLY)
 985    {
 986        struct sigaction act;
 987        sigfillset(&act.sa_mask);
 988        act.sa_handler = SIG_DFL;
 989        sigaction(SIGABRT, &act, NULL);
 990    }
 991#endif
 992    abort();
 993}
 994
 995#if !defined(CONFIG_USER_ONLY)
 996/* Called from RCU critical section */
 997static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 998{
 999    RAMBlock *block;
1000
1001    block = atomic_rcu_read(&ram_list.mru_block);
1002    if (block && addr - block->offset < block->max_length) {
1003        return block;
1004    }
1005    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1006        if (addr - block->offset < block->max_length) {
1007            goto found;
1008        }
1009    }
1010
1011    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1012    abort();
1013
1014found:
1015    /* It is safe to write mru_block outside the iothread lock.  This
1016     * is what happens:
1017     *
1018     *     mru_block = xxx
1019     *     rcu_read_unlock()
1020     *                                        xxx removed from list
1021     *                  rcu_read_lock()
1022     *                  read mru_block
1023     *                                        mru_block = NULL;
1024     *                                        call_rcu(reclaim_ramblock, xxx);
1025     *                  rcu_read_unlock()
1026     *
1027     * atomic_rcu_set is not needed here.  The block was already published
1028     * when it was placed into the list.  Here we're just making an extra
1029     * copy of the pointer.
1030     */
1031    ram_list.mru_block = block;
1032    return block;
1033}
1034
1035static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1036{
1037    CPUState *cpu;
1038    ram_addr_t start1;
1039    RAMBlock *block;
1040    ram_addr_t end;
1041
1042    end = TARGET_PAGE_ALIGN(start + length);
1043    start &= TARGET_PAGE_MASK;
1044
1045    rcu_read_lock();
1046    block = qemu_get_ram_block(start);
1047    assert(block == qemu_get_ram_block(end - 1));
1048    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1049    CPU_FOREACH(cpu) {
1050        tlb_reset_dirty(cpu, start1, length);
1051    }
1052    rcu_read_unlock();
1053}
1054
1055/* Note: start and end must be within the same ram block.  */
1056bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1057                                              ram_addr_t length,
1058                                              unsigned client)
1059{
1060    DirtyMemoryBlocks *blocks;
1061    unsigned long end, page;
1062    bool dirty = false;
1063
1064    if (length == 0) {
1065        return false;
1066    }
1067
1068    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1069    page = start >> TARGET_PAGE_BITS;
1070
1071    rcu_read_lock();
1072
1073    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1074
1075    while (page < end) {
1076        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1077        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1078        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1079
1080        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1081                                              offset, num);
1082        page += num;
1083    }
1084
1085    rcu_read_unlock();
1086
1087    if (dirty && tcg_enabled()) {
1088        tlb_reset_dirty_range_all(start, length);
1089    }
1090
1091    return dirty;
1092}
1093
1094/* Called from RCU critical section */
1095hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1096                                       MemoryRegionSection *section,
1097                                       target_ulong vaddr,
1098                                       hwaddr paddr, hwaddr xlat,
1099                                       int prot,
1100                                       target_ulong *address)
1101{
1102    hwaddr iotlb;
1103    CPUWatchpoint *wp;
1104
1105    if (memory_region_is_ram(section->mr)) {
1106        /* Normal RAM.  */
1107        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1108        if (!section->readonly) {
1109            iotlb |= PHYS_SECTION_NOTDIRTY;
1110        } else {
1111            iotlb |= PHYS_SECTION_ROM;
1112        }
1113    } else {
1114        AddressSpaceDispatch *d;
1115
1116        d = atomic_rcu_read(&section->address_space->dispatch);
1117        iotlb = section - d->map.sections;
1118        iotlb += xlat;
1119    }
1120
1121    /* Make accesses to pages with watchpoints go via the
1122       watchpoint trap routines.  */
1123    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1124        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1125            /* Avoid trapping reads of pages with a write breakpoint. */
1126            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1127                iotlb = PHYS_SECTION_WATCH + paddr;
1128                *address |= TLB_MMIO;
1129                break;
1130            }
1131        }
1132    }
1133
1134    return iotlb;
1135}
1136#endif /* defined(CONFIG_USER_ONLY) */
1137
1138#if !defined(CONFIG_USER_ONLY)
1139
1140static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1141                             uint16_t section);
1142static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1143
1144static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1145                               qemu_anon_ram_alloc;
1146
1147/*
1148 * Set a custom physical guest memory alloator.
1149 * Accelerators with unusual needs may need this.  Hopefully, we can
1150 * get rid of it eventually.
1151 */
1152void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1153{
1154    phys_mem_alloc = alloc;
1155}
1156
1157static uint16_t phys_section_add(PhysPageMap *map,
1158                                 MemoryRegionSection *section)
1159{
1160    /* The physical section number is ORed with a page-aligned
1161     * pointer to produce the iotlb entries.  Thus it should
1162     * never overflow into the page-aligned value.
1163     */
1164    assert(map->sections_nb < TARGET_PAGE_SIZE);
1165
1166    if (map->sections_nb == map->sections_nb_alloc) {
1167        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1168        map->sections = g_renew(MemoryRegionSection, map->sections,
1169                                map->sections_nb_alloc);
1170    }
1171    map->sections[map->sections_nb] = *section;
1172    memory_region_ref(section->mr);
1173    return map->sections_nb++;
1174}
1175
1176static void phys_section_destroy(MemoryRegion *mr)
1177{
1178    bool have_sub_page = mr->subpage;
1179
1180    memory_region_unref(mr);
1181
1182    if (have_sub_page) {
1183        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1184        object_unref(OBJECT(&subpage->iomem));
1185        g_free(subpage);
1186    }
1187}
1188
1189static void phys_sections_free(PhysPageMap *map)
1190{
1191    while (map->sections_nb > 0) {
1192        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1193        phys_section_destroy(section->mr);
1194    }
1195    g_free(map->sections);
1196    g_free(map->nodes);
1197}
1198
1199static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1200{
1201    subpage_t *subpage;
1202    hwaddr base = section->offset_within_address_space
1203        & TARGET_PAGE_MASK;
1204    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1205                                                   d->map.nodes, d->map.sections);
1206    MemoryRegionSection subsection = {
1207        .offset_within_address_space = base,
1208        .size = int128_make64(TARGET_PAGE_SIZE),
1209    };
1210    hwaddr start, end;
1211
1212    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1213
1214    if (!(existing->mr->subpage)) {
1215        subpage = subpage_init(d->as, base);
1216        subsection.address_space = d->as;
1217        subsection.mr = &subpage->iomem;
1218        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1219                      phys_section_add(&d->map, &subsection));
1220    } else {
1221        subpage = container_of(existing->mr, subpage_t, iomem);
1222    }
1223    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1224    end = start + int128_get64(section->size) - 1;
1225    subpage_register(subpage, start, end,
1226                     phys_section_add(&d->map, section));
1227}
1228
1229
1230static void register_multipage(AddressSpaceDispatch *d,
1231                               MemoryRegionSection *section)
1232{
1233    hwaddr start_addr = section->offset_within_address_space;
1234    uint16_t section_index = phys_section_add(&d->map, section);
1235    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1236                                                    TARGET_PAGE_BITS));
1237
1238    assert(num_pages);
1239    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1240}
1241
1242static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1243{
1244    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1245    AddressSpaceDispatch *d = as->next_dispatch;
1246    MemoryRegionSection now = *section, remain = *section;
1247    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1248
1249    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1250        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1251                       - now.offset_within_address_space;
1252
1253        now.size = int128_min(int128_make64(left), now.size);
1254        register_subpage(d, &now);
1255    } else {
1256        now.size = int128_zero();
1257    }
1258    while (int128_ne(remain.size, now.size)) {
1259        remain.size = int128_sub(remain.size, now.size);
1260        remain.offset_within_address_space += int128_get64(now.size);
1261        remain.offset_within_region += int128_get64(now.size);
1262        now = remain;
1263        if (int128_lt(remain.size, page_size)) {
1264            register_subpage(d, &now);
1265        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1266            now.size = page_size;
1267            register_subpage(d, &now);
1268        } else {
1269            now.size = int128_and(now.size, int128_neg(page_size));
1270            register_multipage(d, &now);
1271        }
1272    }
1273}
1274
1275void qemu_flush_coalesced_mmio_buffer(void)
1276{
1277    if (kvm_enabled())
1278        kvm_flush_coalesced_mmio_buffer();
1279}
1280
1281void qemu_mutex_lock_ramlist(void)
1282{
1283    qemu_mutex_lock(&ram_list.mutex);
1284}
1285
1286void qemu_mutex_unlock_ramlist(void)
1287{
1288    qemu_mutex_unlock(&ram_list.mutex);
1289}
1290
1291static int64_t get_file_size(int fd)
1292{
1293    int64_t size = lseek(fd, 0, SEEK_END);
1294    if (size < 0) {
1295        return -errno;
1296    }
1297    return size;
1298}
1299
1300static void *file_ram_alloc(RAMBlock *block,
1301                            ram_addr_t memory,
1302                            const char *path,
1303                            Error **errp)
1304{
1305    bool unlink_on_error = false;
1306    char *filename;
1307    char *sanitized_name;
1308    char *c;
1309#ifdef _WIN32
1310    void *area;
1311#else
1312    void *area = MAP_FAILED;
1313#endif
1314    int fd = -1;
1315    int64_t file_size;
1316
1317
1318    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1319        error_setg(errp,
1320                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1321        return NULL;
1322    }
1323
1324    for (;;) {
1325        fd = open(path, O_RDWR);
1326        if (fd >= 0) {
1327            /* @path names an existing file, use it */
1328            break;
1329        }
1330        if (errno == ENOENT) {
1331            /* @path names a file that doesn't exist, create it */
1332            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1333            if (fd >= 0) {
1334                unlink_on_error = true;
1335                break;
1336            }
1337        } else if (errno == EISDIR) {
1338            /* @path names a directory, create a file there */
1339            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1340            sanitized_name = g_strdup(memory_region_name(block->mr));
1341            for (c = sanitized_name; *c != '\0'; c++) {
1342                if (*c == '/') {
1343                    *c = '_';
1344                }
1345            }
1346
1347            filename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "" \
1348                                       "qemu_back_mem.%s.XXXXXX", path,
1349                                       sanitized_name);
1350            g_free(sanitized_name);
1351
1352#ifdef _WIN32
1353            fd = _open(_mktemp(filename), _O_CREAT | _O_RDWR);
1354#else
1355            fd = mkstemp(filename);
1356#endif
1357            if (fd >= 0) {
1358                unlink(filename);
1359                g_free(filename);
1360                break;
1361            }
1362            g_free(filename);
1363        }
1364        if (errno != EEXIST && errno != EINTR) {
1365            error_setg_errno(errp, errno,
1366                             "can't open backing store %s for guest RAM",
1367                             path);
1368            goto error;
1369        }
1370        /*
1371         * Try again on EINTR and EEXIST.  The latter happens when
1372         * something else creates the file between our two open().
1373         */
1374    }
1375
1376#ifdef _WIN32
1377    SYSTEM_INFO SysInfo;
1378    GetSystemInfo(&SysInfo);
1379    block->page_size = SysInfo.dwPageSize;
1380#else
1381    block->page_size = qemu_fd_getpagesize(fd);
1382#endif
1383    block->mr->align = block->page_size;
1384
1385#if defined(__s390x__)
1386    if (kvm_enabled()) {
1387        block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1388    }
1389#endif
1390
1391    file_size = get_file_size(fd);
1392
1393    if (memory < block->page_size) {
1394        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1395                   "or larger than page size 0x%zx",
1396                   memory, block->page_size);
1397        goto error;
1398    }
1399
1400    if (file_size > 0 && file_size < memory) {
1401        error_setg(errp, "backing store %s size 0x%" PRIx64
1402                   " does not match 'size' option 0x" RAM_ADDR_FMT,
1403                   path, file_size, memory);
1404        goto error;
1405    }
1406
1407    memory = ROUND_UP(memory, block->page_size);
1408
1409    /*
1410     * ftruncate is not supported by hugetlbfs in older
1411     * hosts, so don't bother bailing out on errors.
1412     * If anything goes wrong with it under other filesystems,
1413     * mmap will fail.
1414     *
1415     * Do not truncate the non-empty backend file to avoid corrupting
1416     * the existing data in the file. Disabling shrinking is not
1417     * enough. For example, the current vNVDIMM implementation stores
1418     * the guest NVDIMM labels at the end of the backend file. If the
1419     * backend file is later extended, QEMU will not be able to find
1420     * those labels. Therefore, extending the non-empty backend file
1421     * is disabled as well.
1422     */
1423    if (!file_size && ftruncate(fd, memory)) {
1424        perror("ftruncate");
1425    }
1426
1427#ifdef _WIN32
1428    HANDLE fd_temp = (HANDLE)_get_osfhandle(fd);
1429    HANDLE hMapFile = CreateFileMapping(fd_temp, NULL, PAGE_READWRITE,
1430                                        0, memory, NULL);
1431    area = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, 0);
1432    if (area == NULL) {
1433#else
1434    area = qemu_ram_mmap(fd, memory, block->mr->align,
1435                         block->flags & RAM_SHARED);
1436    if (area == MAP_FAILED) {
1437#endif
1438        error_setg_errno(errp, errno,
1439                         "unable to map backing store for guest RAM");
1440        goto error;
1441    }
1442
1443    if (mem_prealloc) {
1444        os_mem_prealloc(fd, area, memory, errp);
1445        if (errp && *errp) {
1446            goto error;
1447        }
1448    }
1449
1450    block->fd = fd;
1451    return area;
1452
1453error:
1454#ifndef _WIN32
1455    if (area != MAP_FAILED) {
1456        qemu_ram_munmap(area, memory);
1457    }
1458#endif
1459    if (unlink_on_error) {
1460        unlink(path);
1461    }
1462    if (fd != -1) {
1463        close(fd);
1464    }
1465    return NULL;
1466}
1467
1468/* Called with the ramlist lock held.  */
1469static ram_addr_t find_ram_offset(ram_addr_t size)
1470{
1471    RAMBlock *block, *next_block;
1472    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1473
1474    assert(size != 0); /* it would hand out same offset multiple times */
1475
1476    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1477        return 0;
1478    }
1479
1480    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1481        ram_addr_t end, next = RAM_ADDR_MAX;
1482
1483        end = block->offset + block->max_length;
1484
1485        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1486            if (next_block->offset >= end) {
1487                next = MIN(next, next_block->offset);
1488            }
1489        }
1490        if (next - end >= size && next - end < mingap) {
1491            offset = end;
1492            mingap = next - end;
1493        }
1494    }
1495
1496    if (offset == RAM_ADDR_MAX) {
1497        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1498                (uint64_t)size);
1499        abort();
1500    }
1501
1502    return offset;
1503}
1504
1505ram_addr_t last_ram_offset(void)
1506{
1507    RAMBlock *block;
1508    ram_addr_t last = 0;
1509
1510    rcu_read_lock();
1511    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1512        last = MAX(last, block->offset + block->max_length);
1513    }
1514    rcu_read_unlock();
1515    return last;
1516}
1517
1518static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1519{
1520    int ret;
1521
1522    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1523    if (!machine_dump_guest_core(current_machine)) {
1524        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1525        if (ret) {
1526            perror("qemu_madvise");
1527            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1528                            "but dump_guest_core=off specified\n");
1529        }
1530    }
1531}
1532
1533const char *qemu_ram_get_idstr(RAMBlock *rb)
1534{
1535    return rb->idstr;
1536}
1537
1538/* Called with iothread lock held.  */
1539void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1540{
1541    RAMBlock *block;
1542
1543    assert(new_block);
1544    assert(!new_block->idstr[0]);
1545
1546    if (dev) {
1547        char *id = qdev_get_dev_path(dev);
1548        if (id) {
1549            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1550            g_free(id);
1551        }
1552    }
1553    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1554
1555    rcu_read_lock();
1556    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1557        if (block != new_block &&
1558            !strcmp(block->idstr, new_block->idstr)) {
1559            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1560                    new_block->idstr);
1561            abort();
1562        }
1563    }
1564    rcu_read_unlock();
1565}
1566
1567/* Called with iothread lock held.  */
1568void qemu_ram_unset_idstr(RAMBlock *block)
1569{
1570    /* FIXME: arch_init.c assumes that this is not called throughout
1571     * migration.  Ignore the problem since hot-unplug during migration
1572     * does not work anyway.
1573     */
1574    if (block) {
1575        memset(block->idstr, 0, sizeof(block->idstr));
1576    }
1577}
1578
1579size_t qemu_ram_pagesize(RAMBlock *rb)
1580{
1581    return rb->page_size;
1582}
1583
1584static int memory_try_enable_merging(void *addr, size_t len)
1585{
1586    if (!machine_mem_merge(current_machine)) {
1587        /* disabled by the user */
1588        return 0;
1589    }
1590
1591    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1592}
1593
1594/* Only legal before guest might have detected the memory size: e.g. on
1595 * incoming migration, or right after reset.
1596 *
1597 * As memory core doesn't know how is memory accessed, it is up to
1598 * resize callback to update device state and/or add assertions to detect
1599 * misuse, if necessary.
1600 */
1601int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1602{
1603    assert(block);
1604
1605    newsize = HOST_PAGE_ALIGN(newsize);
1606
1607    if (block->used_length == newsize) {
1608        return 0;
1609    }
1610
1611    if (!(block->flags & RAM_RESIZEABLE)) {
1612        error_setg_errno(errp, EINVAL,
1613                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1614                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1615                         newsize, block->used_length);
1616        return -EINVAL;
1617    }
1618
1619    if (block->max_length < newsize) {
1620        error_setg_errno(errp, EINVAL,
1621                         "Length too large: %s: 0x" RAM_ADDR_FMT
1622                         " > 0x" RAM_ADDR_FMT, block->idstr,
1623                         newsize, block->max_length);
1624        return -EINVAL;
1625    }
1626
1627    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1628    block->used_length = newsize;
1629    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1630                                        DIRTY_CLIENTS_ALL);
1631    memory_region_set_size(block->mr, newsize);
1632    if (block->resized) {
1633        block->resized(block->idstr, newsize, block->host);
1634    }
1635    return 0;
1636}
1637
1638/* Called with ram_list.mutex held */
1639static void dirty_memory_extend(ram_addr_t old_ram_size,
1640                                ram_addr_t new_ram_size)
1641{
1642    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1643                                             DIRTY_MEMORY_BLOCK_SIZE);
1644    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1645                                             DIRTY_MEMORY_BLOCK_SIZE);
1646    int i;
1647
1648    /* Only need to extend if block count increased */
1649    if (new_num_blocks <= old_num_blocks) {
1650        return;
1651    }
1652
1653    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1654        DirtyMemoryBlocks *old_blocks;
1655        DirtyMemoryBlocks *new_blocks;
1656        int j;
1657
1658        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1659        new_blocks = g_malloc(sizeof(*new_blocks) +
1660                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1661
1662        if (old_num_blocks) {
1663            memcpy(new_blocks->blocks, old_blocks->blocks,
1664                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1665        }
1666
1667        for (j = old_num_blocks; j < new_num_blocks; j++) {
1668            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1669        }
1670
1671        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1672
1673        if (old_blocks) {
1674            g_free_rcu(old_blocks, rcu);
1675        }
1676    }
1677}
1678
1679static void ram_block_add(RAMBlock *new_block, Error **errp)
1680{
1681    RAMBlock *block;
1682    RAMBlock *last_block = NULL;
1683    ram_addr_t old_ram_size, new_ram_size;
1684    Error *err = NULL;
1685
1686    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1687
1688    qemu_mutex_lock_ramlist();
1689    new_block->offset = find_ram_offset(new_block->max_length);
1690
1691    if (!new_block->host) {
1692        if (xen_enabled()) {
1693            xen_ram_alloc(new_block->offset, new_block->max_length,
1694                          new_block->mr, &err);
1695            if (err) {
1696                error_propagate(errp, err);
1697                qemu_mutex_unlock_ramlist();
1698                return;
1699            }
1700        } else {
1701            new_block->host = phys_mem_alloc(new_block->max_length,
1702                                             &new_block->mr->align);
1703            if (!new_block->host) {
1704                error_setg_errno(errp, errno,
1705                                 "cannot set up guest memory '%s'",
1706                                 memory_region_name(new_block->mr));
1707                qemu_mutex_unlock_ramlist();
1708                return;
1709            }
1710            memory_try_enable_merging(new_block->host, new_block->max_length);
1711        }
1712    }
1713
1714    new_ram_size = MAX(old_ram_size,
1715              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1716    if (new_ram_size > old_ram_size) {
1717        migration_bitmap_extend(old_ram_size, new_ram_size);
1718        dirty_memory_extend(old_ram_size, new_ram_size);
1719    }
1720    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1721     * QLIST (which has an RCU-friendly variant) does not have insertion at
1722     * tail, so save the last element in last_block.
1723     */
1724    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1725        last_block = block;
1726        if (block->max_length < new_block->max_length) {
1727            break;
1728        }
1729    }
1730    if (block) {
1731        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1732    } else if (last_block) {
1733        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1734    } else { /* list is empty */
1735        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1736    }
1737    ram_list.mru_block = NULL;
1738
1739    /* Write list before version */
1740    smp_wmb();
1741    ram_list.version++;
1742    qemu_mutex_unlock_ramlist();
1743
1744    cpu_physical_memory_set_dirty_range(new_block->offset,
1745                                        new_block->used_length,
1746                                        DIRTY_CLIENTS_ALL);
1747
1748    if (new_block->host) {
1749        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1750        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1751        /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1752        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1753    }
1754}
1755
1756RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1757                                   bool share, const char *mem_path,
1758                                   Error **errp)
1759{
1760    RAMBlock *new_block;
1761    Error *local_err = NULL;
1762
1763    if (xen_enabled()) {
1764        error_setg(errp, "-mem-path not supported with Xen");
1765        return NULL;
1766    }
1767
1768    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1769        /*
1770         * file_ram_alloc() needs to allocate just like
1771         * phys_mem_alloc, but we haven't bothered to provide
1772         * a hook there.
1773         */
1774        error_setg(errp,
1775                   "-mem-path not supported with this accelerator");
1776        return NULL;
1777    }
1778
1779    size = HOST_PAGE_ALIGN(size);
1780    new_block = g_malloc0(sizeof(*new_block));
1781    new_block->mr = mr;
1782    new_block->used_length = size;
1783    new_block->max_length = size;
1784    new_block->flags = share ? RAM_SHARED : 0;
1785    new_block->flags |= RAM_FILE;
1786    new_block->host = file_ram_alloc(new_block, size,
1787                                     mem_path, errp);
1788    if (!new_block->host) {
1789        g_free(new_block);
1790        return NULL;
1791    }
1792
1793    ram_block_add(new_block, &local_err);
1794    if (local_err) {
1795        g_free(new_block);
1796        error_propagate(errp, local_err);
1797        return NULL;
1798    }
1799    return new_block;
1800}
1801
1802static
1803RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1804                                  void (*resized)(const char*,
1805                                                  uint64_t length,
1806                                                  void *host),
1807                                  void *host, bool resizeable,
1808                                  MemoryRegion *mr, Error **errp)
1809{
1810    RAMBlock *new_block;
1811    Error *local_err = NULL;
1812
1813    size = HOST_PAGE_ALIGN(size);
1814    max_size = HOST_PAGE_ALIGN(max_size);
1815    new_block = g_malloc0(sizeof(*new_block));
1816    new_block->mr = mr;
1817    new_block->resized = resized;
1818    new_block->used_length = size;
1819    new_block->max_length = max_size;
1820    assert(max_size >= size);
1821    new_block->fd = -1;
1822    new_block->page_size = getpagesize();
1823    new_block->host = host;
1824    if (host) {
1825        new_block->flags |= RAM_PREALLOC;
1826    }
1827    if (resizeable) {
1828        new_block->flags |= RAM_RESIZEABLE;
1829    }
1830    ram_block_add(new_block, &local_err);
1831    if (local_err) {
1832        g_free(new_block);
1833        error_propagate(errp, local_err);
1834        return NULL;
1835    }
1836    return new_block;
1837}
1838
1839RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1840                                   MemoryRegion *mr, Error **errp)
1841{
1842    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1843}
1844
1845RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1846{
1847    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1848}
1849
1850RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1851                                     void (*resized)(const char*,
1852                                                     uint64_t length,
1853                                                     void *host),
1854                                     MemoryRegion *mr, Error **errp)
1855{
1856    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1857}
1858
1859static void reclaim_ramblock(RAMBlock *block)
1860{
1861    if (block->flags & RAM_PREALLOC) {
1862        ;
1863    } else if (xen_enabled()) {
1864        xen_invalidate_map_cache_entry(block->host);
1865    } else if (block->fd >= 0) {
1866#ifdef _WIN32
1867        if (block->host) {
1868            UnmapViewOfFile(block->host);
1869        }
1870#else
1871        qemu_ram_munmap(block->host, block->max_length);
1872#endif
1873        close(block->fd);
1874    } else {
1875        qemu_anon_ram_free(block->host, block->max_length);
1876    }
1877    g_free(block);
1878}
1879
1880void qemu_ram_free(RAMBlock *block)
1881{
1882    if (!block) {
1883        return;
1884    }
1885
1886    qemu_mutex_lock_ramlist();
1887    QLIST_REMOVE_RCU(block, next);
1888    ram_list.mru_block = NULL;
1889    /* Write list before version */
1890    smp_wmb();
1891    ram_list.version++;
1892    call_rcu(block, reclaim_ramblock, rcu);
1893    qemu_mutex_unlock_ramlist();
1894}
1895
1896#ifndef _WIN32
1897void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1898{
1899    RAMBlock *block;
1900    ram_addr_t offset;
1901    int flags;
1902    void *area, *vaddr;
1903
1904    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1905        offset = addr - block->offset;
1906        if (offset < block->max_length) {
1907            vaddr = ramblock_ptr(block, offset);
1908            if (block->flags & RAM_PREALLOC) {
1909                ;
1910            } else if (xen_enabled()) {
1911                abort();
1912            } else {
1913                flags = MAP_FIXED;
1914                if (block->fd >= 0) {
1915                    flags |= (block->flags & RAM_SHARED ?
1916                              MAP_SHARED : MAP_PRIVATE);
1917                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1918                                flags, block->fd, offset);
1919                } else {
1920                    /*
1921                     * Remap needs to match alloc.  Accelerators that
1922                     * set phys_mem_alloc never remap.  If they did,
1923                     * we'd need a remap hook here.
1924                     */
1925                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1926
1927                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1928                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1929                                flags, -1, 0);
1930                }
1931                if (area != vaddr) {
1932                    fprintf(stderr, "Could not remap addr: "
1933                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1934                            length, addr);
1935                    exit(1);
1936                }
1937                memory_try_enable_merging(vaddr, length);
1938                qemu_ram_setup_dump(vaddr, length);
1939            }
1940        }
1941    }
1942}
1943#endif /* !_WIN32 */
1944
1945/* Return a host pointer to ram allocated with qemu_ram_alloc.
1946 * This should not be used for general purpose DMA.  Use address_space_map
1947 * or address_space_rw instead. For local memory (e.g. video ram) that the
1948 * device owns, use memory_region_get_ram_ptr.
1949 *
1950 * Called within RCU critical section.
1951 */
1952void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1953{
1954    RAMBlock *block = ram_block;
1955
1956    if (block == NULL) {
1957        block = qemu_get_ram_block(addr);
1958        addr -= block->offset;
1959    }
1960
1961    if (xen_enabled() && block->host == NULL) {
1962        /* We need to check if the requested address is in the RAM
1963         * because we don't want to map the entire memory in QEMU.
1964         * In that case just map until the end of the page.
1965         */
1966        if (block->offset == 0) {
1967            return xen_map_cache(addr, 0, 0);
1968        }
1969
1970        block->host = xen_map_cache(block->offset, block->max_length, 1);
1971    }
1972    return ramblock_ptr(block, addr);
1973}
1974
1975/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1976 * but takes a size argument.
1977 *
1978 * Called within RCU critical section.
1979 */
1980static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1981                                 hwaddr *size)
1982{
1983    RAMBlock *block = ram_block;
1984    if (*size == 0) {
1985        return NULL;
1986    }
1987
1988    if (block == NULL) {
1989        block = qemu_get_ram_block(addr);
1990        addr -= block->offset;
1991    }
1992    *size = MIN(*size, block->max_length - addr);
1993
1994    if (xen_enabled() && block->host == NULL) {
1995        /* We need to check if the requested address is in the RAM
1996         * because we don't want to map the entire memory in QEMU.
1997         * In that case just map the requested area.
1998         */
1999        if (block->offset == 0) {
2000            return xen_map_cache(addr, *size, 1);
2001        }
2002
2003        block->host = xen_map_cache(block->offset, block->max_length, 1);
2004    }
2005
2006    return ramblock_ptr(block, addr);
2007}
2008
2009/*
2010 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
2011 * in that RAMBlock.
2012 *
2013 * ptr: Host pointer to look up
2014 * round_offset: If true round the result offset down to a page boundary
2015 * *ram_addr: set to result ram_addr
2016 * *offset: set to result offset within the RAMBlock
2017 *
2018 * Returns: RAMBlock (or NULL if not found)
2019 *
2020 * By the time this function returns, the returned pointer is not protected
2021 * by RCU anymore.  If the caller is not within an RCU critical section and
2022 * does not hold the iothread lock, it must have other means of protecting the
2023 * pointer, such as a reference to the region that includes the incoming
2024 * ram_addr_t.
2025 */
2026RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
2027                                   ram_addr_t *offset)
2028{
2029    RAMBlock *block;
2030    uint8_t *host = ptr;
2031
2032    if (xen_enabled()) {
2033        ram_addr_t ram_addr;
2034        rcu_read_lock();
2035        ram_addr = xen_ram_addr_from_mapcache(ptr);
2036        block = qemu_get_ram_block(ram_addr);
2037        if (block) {
2038            *offset = ram_addr - block->offset;
2039        }
2040        rcu_read_unlock();
2041        return block;
2042    }
2043
2044    rcu_read_lock();
2045    block = atomic_rcu_read(&ram_list.mru_block);
2046    if (block && block->host && host - block->host < block->max_length) {
2047        goto found;
2048    }
2049
2050    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2051        /* This case append when the block is not mapped. */
2052        if (block->host == NULL) {
2053            continue;
2054        }
2055        if (host - block->host < block->max_length) {
2056            goto found;
2057        }
2058    }
2059
2060    rcu_read_unlock();
2061    return NULL;
2062
2063found:
2064    *offset = (host - block->host);
2065    if (round_offset) {
2066        *offset &= TARGET_PAGE_MASK;
2067    }
2068    rcu_read_unlock();
2069    return block;
2070}
2071
2072/*
2073 * Finds the named RAMBlock
2074 *
2075 * name: The name of RAMBlock to find
2076 *
2077 * Returns: RAMBlock (or NULL if not found)
2078 */
2079RAMBlock *qemu_ram_block_by_name(const char *name)
2080{
2081    RAMBlock *block;
2082
2083    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2084        if (!strcmp(name, block->idstr)) {
2085            return block;
2086        }
2087    }
2088
2089    return NULL;
2090}
2091
2092/* Some of the softmmu routines need to translate from a host pointer
2093   (typically a TLB entry) back to a ram offset.  */
2094ram_addr_t qemu_ram_addr_from_host(void *ptr)
2095{
2096    RAMBlock *block;
2097    ram_addr_t offset;
2098
2099    block = qemu_ram_block_from_host(ptr, false, &offset);
2100    if (!block) {
2101        return RAM_ADDR_INVALID;
2102    }
2103
2104    return block->offset + offset;
2105}
2106
2107/* Called within RCU critical section.  */
2108static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2109                               uint64_t val, unsigned size)
2110{
2111    bool locked = false;
2112
2113    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2114        locked = true;
2115        tb_lock();
2116        tb_invalidate_phys_page_fast(ram_addr, size);
2117    }
2118    switch (size) {
2119    case 1:
2120        stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2121        break;
2122    case 2:
2123        stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2124        break;
2125    case 4:
2126        stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2127        break;
2128    default:
2129        abort();
2130    }
2131
2132    if (locked) {
2133        tb_unlock();
2134    }
2135
2136    /* Set both VGA and migration bits for simplicity and to remove
2137     * the notdirty callback faster.
2138     */
2139    cpu_physical_memory_set_dirty_range(ram_addr, size,
2140                                        DIRTY_CLIENTS_NOCODE);
2141    /* we remove the notdirty callback only if the code has been
2142       flushed */
2143    if (!cpu_physical_memory_is_clean(ram_addr)) {
2144        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2145    }
2146}
2147
2148static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2149                                 unsigned size, bool is_write)
2150{
2151    return is_write;
2152}
2153
2154static const MemoryRegionOps notdirty_mem_ops = {
2155    .write = notdirty_mem_write,
2156    .valid.accepts = notdirty_mem_accepts,
2157    .endianness = DEVICE_NATIVE_ENDIAN,
2158};
2159
2160/* Generate a debug exception if a watchpoint has been hit.  */
2161static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2162{
2163    CPUState *cpu = current_cpu;
2164    CPUClass *cc = CPU_GET_CLASS(cpu);
2165    CPUArchState *env = cpu->env_ptr;
2166    target_ulong pc, cs_base;
2167    target_ulong vaddr;
2168    CPUWatchpoint *wp;
2169    uint32_t cpu_flags;
2170
2171    if (cpu->watchpoint_hit) {
2172        /* We re-entered the check after replacing the TB. Now raise
2173         * the debug interrupt so that is will trigger after the
2174         * current instruction. */
2175        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2176        return;
2177    }
2178    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2179    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2180        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2181            && (wp->flags & flags)) {
2182            if (flags == BP_MEM_READ) {
2183                wp->flags |= BP_WATCHPOINT_HIT_READ;
2184            } else {
2185                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2186            }
2187            wp->hitaddr = vaddr;
2188            wp->hitattrs = attrs;
2189            if (!cpu->watchpoint_hit) {
2190                if (wp->flags & BP_CPU &&
2191                    !cc->debug_check_watchpoint(cpu, wp)) {
2192                    wp->flags &= ~BP_WATCHPOINT_HIT;
2193                    continue;
2194                }
2195                cpu->watchpoint_hit = wp;
2196
2197                /* The tb_lock will be reset when cpu_loop_exit or
2198                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2199                 * main loop.
2200                 */
2201                tb_lock();
2202                tb_check_watchpoint(cpu);
2203                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2204                    cpu->exception_index = EXCP_DEBUG;
2205                    cpu_loop_exit(cpu);
2206                } else {
2207                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2208                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2209                    cpu_loop_exit_noexc(cpu);
2210                }
2211            }
2212        } else {
2213            wp->flags &= ~BP_WATCHPOINT_HIT;
2214        }
2215    }
2216}
2217
2218/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2219   so these check for a hit then pass through to the normal out-of-line
2220   phys routines.  */
2221static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2222                                  unsigned size, MemTxAttrs attrs)
2223{
2224    MemTxResult res;
2225    uint64_t data;
2226    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2227    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2228
2229    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2230    switch (size) {
2231    case 1:
2232        data = address_space_ldub(as, addr, attrs, &res);
2233        break;
2234    case 2:
2235        data = address_space_lduw(as, addr, attrs, &res);
2236        break;
2237    case 4:
2238        data = address_space_ldl(as, addr, attrs, &res);
2239        break;
2240    default: abort();
2241    }
2242    *pdata = data;
2243    return res;
2244}
2245
2246static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2247                                   uint64_t val, unsigned size,
2248                                   MemTxAttrs attrs)
2249{
2250    MemTxResult res;
2251    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2252    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2253
2254    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2255    switch (size) {
2256    case 1:
2257        address_space_stb(as, addr, val, attrs, &res);
2258        break;
2259    case 2:
2260        address_space_stw(as, addr, val, attrs, &res);
2261        break;
2262    case 4:
2263        address_space_stl(as, addr, val, attrs, &res);
2264        break;
2265    default: abort();
2266    }
2267    return res;
2268}
2269
2270static const MemoryRegionOps watch_mem_ops = {
2271    .read_with_attrs = watch_mem_read,
2272    .write_with_attrs = watch_mem_write,
2273    .endianness = DEVICE_NATIVE_ENDIAN,
2274};
2275
2276static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2277                                unsigned len, MemTxAttrs attrs)
2278{
2279    subpage_t *subpage = opaque;
2280    uint8_t buf[8];
2281    MemTxResult res;
2282
2283#if defined(DEBUG_SUBPAGE)
2284    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2285           subpage, len, addr);
2286#endif
2287    res = address_space_read(subpage->as, addr + subpage->base,
2288                             attrs, buf, len);
2289    if (res) {
2290        return res;
2291    }
2292    switch (len) {
2293    case 1:
2294        *data = ldub_p(buf);
2295        return MEMTX_OK;
2296    case 2:
2297        *data = lduw_p(buf);
2298        return MEMTX_OK;
2299    case 4:
2300        *data = ldl_p(buf);
2301        return MEMTX_OK;
2302    case 8:
2303        *data = ldq_p(buf);
2304        return MEMTX_OK;
2305    default:
2306        abort();
2307    }
2308}
2309
2310static MemTxResult subpage_write(void *opaque, hwaddr addr,
2311                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2312{
2313    subpage_t *subpage = opaque;
2314    uint8_t buf[8];
2315
2316#if defined(DEBUG_SUBPAGE)
2317    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2318           " value %"PRIx64"\n",
2319           __func__, subpage, len, addr, value);
2320#endif
2321    switch (len) {
2322    case 1:
2323        stb_p(buf, value);
2324        break;
2325    case 2:
2326        stw_p(buf, value);
2327        break;
2328    case 4:
2329        stl_p(buf, value);
2330        break;
2331    case 8:
2332        stq_p(buf, value);
2333        break;
2334    default:
2335        abort();
2336    }
2337    return address_space_write(subpage->as, addr + subpage->base,
2338                               attrs, buf, len);
2339}
2340
2341static bool subpage_accepts(void *opaque, hwaddr addr,
2342                            unsigned len, bool is_write)
2343{
2344    subpage_t *subpage = opaque;
2345#if defined(DEBUG_SUBPAGE)
2346    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2347           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2348#endif
2349
2350    return address_space_access_valid(subpage->as, addr + subpage->base,
2351                                      len, is_write);
2352}
2353
2354static const MemoryRegionOps subpage_ops = {
2355    .read_with_attrs = subpage_read,
2356    .write_with_attrs = subpage_write,
2357    .impl.min_access_size = 1,
2358    .impl.max_access_size = 8,
2359    .valid.min_access_size = 1,
2360    .valid.max_access_size = 8,
2361    .valid.accepts = subpage_accepts,
2362    .endianness = DEVICE_NATIVE_ENDIAN,
2363};
2364
2365static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2366                             uint16_t section)
2367{
2368    int idx, eidx;
2369
2370    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2371        return -1;
2372    idx = SUBPAGE_IDX(start);
2373    eidx = SUBPAGE_IDX(end);
2374#if defined(DEBUG_SUBPAGE)
2375    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2376           __func__, mmio, start, end, idx, eidx, section);
2377#endif
2378    for (; idx <= eidx; idx++) {
2379        mmio->sub_section[idx] = section;
2380    }
2381
2382    return 0;
2383}
2384
2385static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2386{
2387    subpage_t *mmio;
2388
2389    mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2390    mmio->as = as;
2391    mmio->base = base;
2392    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2393                          NULL, TARGET_PAGE_SIZE);
2394    mmio->iomem.subpage = true;
2395#if defined(DEBUG_SUBPAGE)
2396    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2397           mmio, base, TARGET_PAGE_SIZE);
2398#endif
2399    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2400
2401    return mmio;
2402}
2403
2404static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2405                              MemoryRegion *mr)
2406{
2407    assert(as);
2408    MemoryRegionSection section = {
2409        .address_space = as,
2410        .mr = mr,
2411        .offset_within_address_space = 0,
2412        .offset_within_region = 0,
2413        .size = int128_2_64(),
2414    };
2415
2416    return phys_section_add(map, &section);
2417}
2418
2419MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2420{
2421    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2422    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2423    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2424    MemoryRegionSection *sections = d->map.sections;
2425
2426    return sections[index & ~TARGET_PAGE_MASK].mr;
2427}
2428
2429static void io_mem_init(void)
2430{
2431    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2432    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2433                          NULL, UINT64_MAX);
2434    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2435                          NULL, UINT64_MAX);
2436    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2437                          NULL, UINT64_MAX);
2438}
2439
2440static void mem_begin(MemoryListener *listener)
2441{
2442    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2443    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2444    uint16_t n;
2445
2446    n = dummy_section(&d->map, as, &io_mem_unassigned);
2447    assert(n == PHYS_SECTION_UNASSIGNED);
2448    n = dummy_section(&d->map, as, &io_mem_notdirty);
2449    assert(n == PHYS_SECTION_NOTDIRTY);
2450    n = dummy_section(&d->map, as, &io_mem_rom);
2451    assert(n == PHYS_SECTION_ROM);
2452    n = dummy_section(&d->map, as, &io_mem_watch);
2453    assert(n == PHYS_SECTION_WATCH);
2454
2455    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2456    d->as = as;
2457    as->next_dispatch = d;
2458}
2459
2460static void address_space_dispatch_free(AddressSpaceDispatch *d)
2461{
2462    phys_sections_free(&d->map);
2463    g_free(d);
2464}
2465
2466static void mem_commit(MemoryListener *listener)
2467{
2468    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2469    AddressSpaceDispatch *cur = as->dispatch;
2470    AddressSpaceDispatch *next = as->next_dispatch;
2471
2472    phys_page_compact_all(next, next->map.nodes_nb);
2473
2474    atomic_rcu_set(&as->dispatch, next);
2475    if (cur) {
2476        call_rcu(cur, address_space_dispatch_free, rcu);
2477    }
2478}
2479
2480static void tcg_commit(MemoryListener *listener)
2481{
2482    CPUAddressSpace *cpuas;
2483    AddressSpaceDispatch *d;
2484
2485    /* since each CPU stores ram addresses in its TLB cache, we must
2486       reset the modified entries */
2487    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2488    cpu_reloading_memory_map();
2489    /* The CPU and TLB are protected by the iothread lock.
2490     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2491     * may have split the RCU critical section.
2492     */
2493    d = atomic_rcu_read(&cpuas->as->dispatch);
2494    atomic_rcu_set(&cpuas->memory_dispatch, d);
2495    tlb_flush(cpuas->cpu, 1);
2496}
2497
2498void address_space_init_dispatch(AddressSpace *as)
2499{
2500    as->dispatch = NULL;
2501    as->dispatch_listener = (MemoryListener) {
2502        .begin = mem_begin,
2503        .commit = mem_commit,
2504        .region_add = mem_add,
2505        .region_nop = mem_add,
2506        .priority = 0,
2507    };
2508    memory_listener_register(&as->dispatch_listener, as);
2509}
2510
2511void address_space_unregister(AddressSpace *as)
2512{
2513    memory_listener_unregister(&as->dispatch_listener);
2514}
2515
2516void address_space_destroy_dispatch(AddressSpace *as)
2517{
2518    AddressSpaceDispatch *d = as->dispatch;
2519
2520    atomic_rcu_set(&as->dispatch, NULL);
2521    if (d) {
2522        call_rcu(d, address_space_dispatch_free, rcu);
2523    }
2524}
2525
2526static void memory_map_init(void)
2527{
2528    system_memory = g_malloc(sizeof(*system_memory));
2529
2530    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2531    address_space_init(&address_space_memory, system_memory, "memory");
2532
2533    system_io = g_malloc(sizeof(*system_io));
2534    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2535                          65536);
2536    address_space_init(&address_space_io, system_io, "I/O");
2537}
2538
2539MemoryRegion *get_system_memory(void)
2540{
2541    return system_memory;
2542}
2543
2544MemoryRegion *get_system_io(void)
2545{
2546    return system_io;
2547}
2548
2549#endif /* !defined(CONFIG_USER_ONLY) */
2550
2551/* physical memory access (slow version, mainly for debug) */
2552#if defined(CONFIG_USER_ONLY)
2553int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2554                        uint8_t *buf, int len, int is_write)
2555{
2556    int l, flags;
2557    target_ulong page;
2558    void * p;
2559
2560    while (len > 0) {
2561        page = addr & TARGET_PAGE_MASK;
2562        l = (page + TARGET_PAGE_SIZE) - addr;
2563        if (l > len)
2564            l = len;
2565        flags = page_get_flags(page);
2566        if (!(flags & PAGE_VALID))
2567            return -1;
2568        if (is_write) {
2569            if (!(flags & PAGE_WRITE))
2570                return -1;
2571            /* XXX: this code should not depend on lock_user */
2572            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2573                return -1;
2574            memcpy(p, buf, l);
2575            unlock_user(p, addr, l);
2576        } else {
2577            if (!(flags & PAGE_READ))
2578                return -1;
2579            /* XXX: this code should not depend on lock_user */
2580            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2581                return -1;
2582            memcpy(buf, p, l);
2583            unlock_user(p, addr, 0);
2584        }
2585        len -= l;
2586        buf += l;
2587        addr += l;
2588    }
2589    return 0;
2590}
2591
2592void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2593                const char *name, Error **errp)
2594{
2595}
2596
2597#else
2598
2599void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2600                const char *name, Error **errp)
2601{
2602    CPUState *cpu = CPU(obj);
2603    Error *local_err = NULL;
2604    char *path = NULL;
2605
2606    visit_type_str(v, name, &path, &local_err);
2607
2608    if (!local_err && strcmp(path, "") != 0) {
2609        cpu->memory = MEMORY_REGION(object_resolve_link(obj, name, path,
2610                                &local_err));
2611    }
2612
2613    if (local_err) {
2614        error_propagate(errp, local_err);
2615        return;
2616    }
2617
2618    object_ref(OBJECT(cpu->memory));
2619    cpu->as = address_space_init_shareable(cpu->memory, NULL);
2620}
2621
2622static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2623                                     hwaddr length)
2624{
2625    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2626    addr += memory_region_get_ram_addr(mr);
2627
2628    /* No early return if dirty_log_mask is or becomes 0, because
2629     * cpu_physical_memory_set_dirty_range will still call
2630     * xen_modified_memory.
2631     */
2632    if (dirty_log_mask) {
2633        dirty_log_mask =
2634            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2635    }
2636    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2637        tb_lock();
2638        tb_invalidate_phys_range(addr, addr + length);
2639        tb_unlock();
2640        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2641    }
2642    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2643}
2644
2645static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2646{
2647    unsigned access_size_max = mr->ops->valid.max_access_size;
2648
2649    /* Regions are assumed to support 1-4 byte accesses unless
2650       otherwise specified.  */
2651    if (access_size_max == 0) {
2652        access_size_max = 4;
2653    }
2654
2655    /* Bound the maximum access by the alignment of the address.  */
2656    if (!mr->ops->impl.unaligned) {
2657        unsigned align_size_max = addr & -addr;
2658        if (align_size_max != 0 && align_size_max < access_size_max) {
2659            access_size_max = align_size_max;
2660        }
2661    }
2662
2663    /* Don't attempt accesses larger than the maximum.  */
2664    if (l > access_size_max) {
2665        l = access_size_max;
2666    }
2667    l = pow2floor(l);
2668
2669    return l;
2670}
2671
2672static bool prepare_mmio_access(MemoryRegion *mr)
2673{
2674    bool unlocked = !qemu_mutex_iothread_locked();
2675    bool release_lock = false;
2676
2677    if (unlocked && mr->global_locking) {
2678        qemu_mutex_lock_iothread();
2679        unlocked = false;
2680        release_lock = true;
2681    }
2682    if (mr->flush_coalesced_mmio) {
2683        if (unlocked) {
2684            qemu_mutex_lock_iothread();
2685        }
2686        qemu_flush_coalesced_mmio_buffer();
2687        if (unlocked) {
2688            qemu_mutex_unlock_iothread();
2689        }
2690    }
2691
2692    return release_lock;
2693}
2694
2695/* Called within RCU critical section.  */
2696static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2697                                                MemTxAttrs attrs,
2698                                                const uint8_t *buf,
2699                                                int len, hwaddr addr1,
2700                                                hwaddr l, MemoryRegion *mr)
2701{
2702    uint8_t *ptr;
2703    uint64_t val;
2704    MemTxResult result = MEMTX_OK;
2705    bool release_lock = false;
2706
2707    for (;;) {
2708        if (!memory_access_is_direct(mr, true)) {
2709            release_lock |= prepare_mmio_access(mr);
2710            l = memory_access_size(mr, l, addr1);
2711            /* XXX: could force current_cpu to NULL to avoid
2712               potential bugs */
2713            switch (l) {
2714            case 8:
2715                /* 64 bit write access */
2716                val = ldq_p(buf);
2717                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2718                                                       attrs);
2719                break;
2720            case 4:
2721                /* 32 bit write access */
2722                val = ldl_p(buf);
2723                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2724                                                       attrs);
2725                break;
2726            case 2:
2727                /* 16 bit write access */
2728                val = lduw_p(buf);
2729                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2730                                                       attrs);
2731                break;
2732            case 1:
2733                /* 8 bit write access */
2734                val = ldub_p(buf);
2735                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2736                                                       attrs);
2737                break;
2738            default:
2739                if (mr->ops->access) {
2740                    MemoryTransaction tr = {
2741                        .data.p8 = (uint8_t *) buf,
2742                        .rw = true,
2743                        .addr = addr1,
2744                        .size = l,
2745                        .attr = attrs,
2746                        .opaque = mr->opaque,
2747                    };
2748                    mr->ops->access(&tr);
2749                } else {
2750                    abort();
2751                }
2752            }
2753        } else {
2754            /* RAM case */
2755            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2756            memcpy(ptr, buf, l);
2757            invalidate_and_set_dirty(mr, addr1, l);
2758        }
2759
2760        if (release_lock) {
2761            qemu_mutex_unlock_iothread();
2762            release_lock = false;
2763        }
2764
2765        len -= l;
2766        buf += l;
2767        addr += l;
2768
2769        if (!len) {
2770            break;
2771        }
2772
2773        l = len;
2774        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2775    }
2776
2777    return result;
2778}
2779
2780MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2781                                const uint8_t *buf, int len)
2782{
2783    hwaddr l;
2784    hwaddr addr1;
2785    MemoryRegion *mr;
2786    MemTxResult result = MEMTX_OK;
2787
2788    if (len > 0) {
2789        rcu_read_lock();
2790        l = len;
2791        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2792        result = address_space_write_continue(as, addr, attrs, buf, len,
2793                                              addr1, l, mr);
2794        rcu_read_unlock();
2795    }
2796
2797    return result;
2798}
2799
2800/* Called within RCU critical section.  */
2801MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2802                                        MemTxAttrs attrs, uint8_t *buf,
2803                                        int len, hwaddr addr1, hwaddr l,
2804                                        MemoryRegion *mr)
2805{
2806    uint8_t *ptr;
2807    uint64_t val;
2808    MemTxResult result = MEMTX_OK;
2809    bool release_lock = false;
2810
2811    for (;;) {
2812        if (!memory_access_is_direct(mr, false)) {
2813            /* I/O case */
2814            release_lock |= prepare_mmio_access(mr);
2815            l = memory_access_size(mr, l, addr1);
2816            switch (l) {
2817            case 8:
2818                /* 64 bit read access */
2819                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2820                                                      attrs);
2821                stq_p(buf, val);
2822                break;
2823            case 4:
2824                /* 32 bit read access */
2825                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2826                                                      attrs);
2827                stl_p(buf, val);
2828                break;
2829            case 2:
2830                /* 16 bit read access */
2831                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2832                                                      attrs);
2833                stw_p(buf, val);
2834                break;
2835            case 1:
2836                /* 8 bit read access */
2837                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2838                                                      attrs);
2839                stb_p(buf, val);
2840                break;
2841            default:
2842                if (mr->ops->access) {
2843                    MemoryTransaction tr = {
2844                        .data.p8 = buf,
2845                        .rw = false,
2846                        .addr = addr1,
2847                        .size = l,
2848                        .attr = attrs,
2849                        .opaque = mr->opaque,
2850                    };
2851                    mr->ops->access(&tr);
2852                } else {
2853                    abort();
2854                }
2855            }
2856        } else {
2857            /* RAM case */
2858            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2859            memcpy(buf, ptr, l);
2860        }
2861
2862        if (release_lock) {
2863            qemu_mutex_unlock_iothread();
2864            release_lock = false;
2865        }
2866
2867        len -= l;
2868        buf += l;
2869        addr += l;
2870
2871        if (!len) {
2872            break;
2873        }
2874
2875        l = len;
2876        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2877    }
2878
2879    return result;
2880}
2881
2882MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2883                                    MemTxAttrs attrs, uint8_t *buf, int len)
2884{
2885    hwaddr l;
2886    hwaddr addr1;
2887    MemoryRegion *mr;
2888    MemTxResult result = MEMTX_OK;
2889
2890    if (len > 0) {
2891        rcu_read_lock();
2892        l = len;
2893        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2894        result = address_space_read_continue(as, addr, attrs, buf, len,
2895                                             addr1, l, mr);
2896        rcu_read_unlock();
2897    }
2898
2899    return result;
2900}
2901
2902MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2903                             uint8_t *buf, int len, bool is_write)
2904{
2905    if (is_write) {
2906        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2907    } else {
2908        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2909    }
2910}
2911
2912void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2913                            int len, int is_write)
2914{
2915    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2916                     buf, len, is_write);
2917}
2918
2919enum write_rom_type {
2920    WRITE_DATA,
2921    FLUSH_CACHE,
2922};
2923
2924static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2925    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2926{
2927    hwaddr l;
2928    uint8_t *ptr;
2929    hwaddr addr1;
2930    MemoryRegion *mr;
2931
2932    rcu_read_lock();
2933    while (len > 0) {
2934        l = len;
2935        mr = address_space_translate(as, addr, &addr1, &l, true);
2936
2937        if (!(memory_region_is_ram(mr) ||
2938              memory_region_is_romd(mr))) {
2939            if (type == WRITE_DATA) {
2940                address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
2941                                 (uint8_t *) buf, len, true);
2942            } else {
2943                l = memory_access_size(mr, l, addr1);
2944            }
2945        } else {
2946            /* ROM/RAM case */
2947            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2948            switch (type) {
2949            case WRITE_DATA:
2950                memcpy(ptr, buf, l);
2951                invalidate_and_set_dirty(mr, addr1, l);
2952                break;
2953            case FLUSH_CACHE:
2954                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2955                break;
2956            }
2957        }
2958        len -= l;
2959        buf += l;
2960        addr += l;
2961    }
2962    rcu_read_unlock();
2963}
2964
2965/* used for ROM loading : can write in RAM and ROM */
2966void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2967                                   const uint8_t *buf, int len)
2968{
2969    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2970}
2971
2972void cpu_flush_icache_range(hwaddr start, int len)
2973{
2974    /*
2975     * This function should do the same thing as an icache flush that was
2976     * triggered from within the guest. For TCG we are always cache coherent,
2977     * so there is no need to flush anything. For KVM / Xen we need to flush
2978     * the host's instruction cache at least.
2979     */
2980    if (tcg_enabled()) {
2981        return;
2982    }
2983
2984    cpu_physical_memory_write_rom_internal(&address_space_memory,
2985                                           start, NULL, len, FLUSH_CACHE);
2986}
2987
2988typedef struct {
2989    MemoryRegion *mr;
2990    void *buffer;
2991    hwaddr addr;
2992    hwaddr len;
2993    bool in_use;
2994} BounceBuffer;
2995
2996static BounceBuffer bounce;
2997
2998typedef struct MapClient {
2999    QEMUBH *bh;
3000    QLIST_ENTRY(MapClient) link;
3001} MapClient;
3002
3003QemuMutex map_client_list_lock;
3004static QLIST_HEAD(map_client_list, MapClient) map_client_list
3005    = QLIST_HEAD_INITIALIZER(map_client_list);
3006
3007static void cpu_unregister_map_client_do(MapClient *client)
3008{
3009    QLIST_REMOVE(client, link);
3010    g_free(client);
3011}
3012
3013static void cpu_notify_map_clients_locked(void)
3014{
3015    MapClient *client;
3016
3017    while (!QLIST_EMPTY(&map_client_list)) {
3018        client = QLIST_FIRST(&map_client_list);
3019        qemu_bh_schedule(client->bh);
3020        cpu_unregister_map_client_do(client);
3021    }
3022}
3023
3024void cpu_register_map_client(QEMUBH *bh)
3025{
3026    MapClient *client = g_malloc(sizeof(*client));
3027
3028    qemu_mutex_lock(&map_client_list_lock);
3029    client->bh = bh;
3030    QLIST_INSERT_HEAD(&map_client_list, client, link);
3031    if (!atomic_read(&bounce.in_use)) {
3032        cpu_notify_map_clients_locked();
3033    }
3034    qemu_mutex_unlock(&map_client_list_lock);
3035}
3036
3037void cpu_exec_init_all(void)
3038{
3039    qemu_mutex_init(&ram_list.mutex);
3040    /* The data structures we set up here depend on knowing the page size,
3041     * so no more changes can be made after this point.
3042     * In an ideal world, nothing we did before we had finished the
3043     * machine setup would care about the target page size, and we could
3044     * do this much later, rather than requiring board models to state
3045     * up front what their requirements are.
3046     */
3047    finalize_target_page_bits();
3048    io_mem_init();
3049    memory_map_init();
3050    qemu_mutex_init(&map_client_list_lock);
3051}
3052
3053void cpu_unregister_map_client(QEMUBH *bh)
3054{
3055    MapClient *client;
3056
3057    qemu_mutex_lock(&map_client_list_lock);
3058    QLIST_FOREACH(client, &map_client_list, link) {
3059        if (client->bh == bh) {
3060            cpu_unregister_map_client_do(client);
3061            break;
3062        }
3063    }
3064    qemu_mutex_unlock(&map_client_list_lock);
3065}
3066
3067static void cpu_notify_map_clients(void)
3068{
3069    qemu_mutex_lock(&map_client_list_lock);
3070    cpu_notify_map_clients_locked();
3071    qemu_mutex_unlock(&map_client_list_lock);
3072}
3073
3074bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
3075{
3076    MemoryRegion *mr;
3077    hwaddr l, xlat;
3078
3079    rcu_read_lock();
3080    while (len > 0) {
3081        l = len;
3082        mr = address_space_translate(as, addr, &xlat, &l, is_write);
3083        if (!memory_access_is_direct(mr, is_write)) {
3084            l = memory_access_size(mr, l, addr);
3085            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
3086                rcu_read_unlock();
3087                return false;
3088            }
3089        }
3090
3091        len -= l;
3092        addr += l;
3093    }
3094    rcu_read_unlock();
3095    return true;
3096}
3097
3098/* Map a physical memory region into a host virtual address.
3099 * May map a subset of the requested range, given by and returned in *plen.
3100 * May return NULL if resources needed to perform the mapping are exhausted.
3101 * Use only for reads OR writes - not for read-modify-write operations.
3102 * Use cpu_register_map_client() to know when retrying the map operation is
3103 * likely to succeed.
3104 */
3105void *address_space_map(AddressSpace *as,
3106                        hwaddr addr,
3107                        hwaddr *plen,
3108                        bool is_write)
3109{
3110    hwaddr len = *plen;
3111    hwaddr done = 0;
3112    hwaddr l, xlat, base;
3113    MemoryRegion *mr, *this_mr;
3114    void *ptr;
3115
3116    if (len == 0) {
3117        return NULL;
3118    }
3119
3120    l = len;
3121    rcu_read_lock();
3122    mr = address_space_translate(as, addr, &xlat, &l, is_write);
3123
3124    if (!memory_access_is_direct(mr, is_write)) {
3125        if (atomic_xchg(&bounce.in_use, true)) {
3126            rcu_read_unlock();
3127            return NULL;
3128        }
3129        /* Avoid unbounded allocations */
3130        l = MIN(l, TARGET_PAGE_SIZE);
3131        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3132        bounce.addr = addr;
3133        bounce.len = l;
3134
3135        memory_region_ref(mr);
3136        bounce.mr = mr;
3137        if (!is_write) {
3138            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3139                               bounce.buffer, l);
3140        }
3141
3142        rcu_read_unlock();
3143        *plen = l;
3144        return bounce.buffer;
3145    }
3146
3147    base = xlat;
3148
3149    for (;;) {
3150        len -= l;
3151        addr += l;
3152        done += l;
3153        if (len == 0) {
3154            break;
3155        }
3156
3157        l = len;
3158        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3159        if (this_mr != mr || xlat != base + done) {
3160            break;
3161        }
3162    }
3163
3164    memory_region_ref(mr);
3165    *plen = done;
3166    ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3167    rcu_read_unlock();
3168
3169    return ptr;
3170}
3171
3172/* Unmaps a memory region previously mapped by address_space_map().
3173 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3174 * the amount of memory that was actually read or written by the caller.
3175 */
3176void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3177                         int is_write, hwaddr access_len)
3178{
3179    if (buffer != bounce.buffer) {
3180        MemoryRegion *mr;
3181        ram_addr_t addr1;
3182
3183        mr = memory_region_from_host(buffer, &addr1);
3184        assert(mr != NULL);
3185        if (is_write) {
3186            invalidate_and_set_dirty(mr, addr1, access_len);
3187        }
3188        if (xen_enabled()) {
3189            xen_invalidate_map_cache_entry(buffer);
3190        }
3191        memory_region_unref(mr);
3192        return;
3193    }
3194    if (is_write) {
3195        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3196                            bounce.buffer, access_len);
3197    }
3198    qemu_vfree(bounce.buffer);
3199    bounce.buffer = NULL;
3200    memory_region_unref(bounce.mr);
3201    atomic_mb_set(&bounce.in_use, false);
3202    cpu_notify_map_clients();
3203}
3204
3205void *cpu_physical_memory_map(hwaddr addr,
3206                              hwaddr *plen,
3207                              int is_write)
3208{
3209    return address_space_map(&address_space_memory, addr, plen, is_write);
3210}
3211
3212void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3213                               int is_write, hwaddr access_len)
3214{
3215    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3216}
3217
3218/* warning: addr must be aligned */
3219static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3220                                                  MemTxAttrs attrs,
3221                                                  MemTxResult *result,
3222                                                  enum device_endian endian)
3223{
3224    uint8_t *ptr;
3225    uint64_t val;
3226    MemoryRegion *mr;
3227    hwaddr l = 4;
3228    hwaddr addr1;
3229    MemTxResult r;
3230    bool release_lock = false;
3231
3232    rcu_read_lock();
3233    mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
3234    if (l < 4 || !memory_access_is_direct(mr, false)) {
3235        release_lock |= prepare_mmio_access(mr);
3236
3237        /* I/O case */
3238        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3239#if defined(TARGET_WORDS_BIGENDIAN)
3240        if (endian == DEVICE_LITTLE_ENDIAN) {
3241            val = bswap32(val);
3242        }
3243#else
3244        if (endian == DEVICE_BIG_ENDIAN) {
3245            val = bswap32(val);
3246        }
3247#endif
3248    } else {
3249        /* RAM case */
3250        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3251        switch (endian) {
3252        case DEVICE_LITTLE_ENDIAN:
3253            val = ldl_le_p(ptr);
3254            break;
3255        case DEVICE_BIG_ENDIAN:
3256            val = ldl_be_p(ptr);
3257            break;
3258        default:
3259            val = ldl_p(ptr);
3260            break;
3261        }
3262        r = MEMTX_OK;
3263    }
3264    if (result) {
3265        *result = r;
3266    }
3267    if (release_lock) {
3268        qemu_mutex_unlock_iothread();
3269    }
3270    rcu_read_unlock();
3271    return val;
3272}
3273
3274uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3275                           MemTxAttrs attrs, MemTxResult *result)
3276{
3277    return address_space_ldl_internal(as, addr, attrs, result,
3278                                      DEVICE_NATIVE_ENDIAN);
3279}
3280
3281uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3282                              MemTxAttrs attrs, MemTxResult *result)
3283{
3284    return address_space_ldl_internal(as, addr, attrs, result,
3285                                      DEVICE_LITTLE_ENDIAN);
3286}
3287
3288uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3289                              MemTxAttrs attrs, MemTxResult *result)
3290{
3291    return address_space_ldl_internal(as, addr, attrs, result,
3292                                      DEVICE_BIG_ENDIAN);
3293}
3294
3295uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3296{
3297    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3298}
3299
3300uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3301{
3302    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3303}
3304
3305uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3306{
3307    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3308}
3309
3310/* warning: addr must be aligned */
3311static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3312                                                  MemTxAttrs attrs,
3313                                                  MemTxResult *result,
3314                                                  enum device_endian endian)
3315{
3316    uint8_t *ptr;
3317    uint64_t val;
3318    MemoryRegion *mr;
3319    hwaddr l = 8;
3320    hwaddr addr1;
3321    MemTxResult r;
3322    bool release_lock = false;
3323
3324    rcu_read_lock();
3325    mr = address_space_translate_attr(as, addr, &addr1, &l,
3326                                 false, &attrs);
3327    if (l < 8 || !memory_access_is_direct(mr, false)) {
3328        release_lock |= prepare_mmio_access(mr);
3329
3330        /* I/O case */
3331        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3332#if defined(TARGET_WORDS_BIGENDIAN)
3333        if (endian == DEVICE_LITTLE_ENDIAN) {
3334            val = bswap64(val);
3335        }
3336#else
3337        if (endian == DEVICE_BIG_ENDIAN) {
3338            val = bswap64(val);
3339        }
3340#endif
3341    } else {
3342        /* RAM case */
3343        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3344        switch (endian) {
3345        case DEVICE_LITTLE_ENDIAN:
3346            val = ldq_le_p(ptr);
3347            break;
3348        case DEVICE_BIG_ENDIAN:
3349            val = ldq_be_p(ptr);
3350            break;
3351        default:
3352            val = ldq_p(ptr);
3353            break;
3354        }
3355        r = MEMTX_OK;
3356    }
3357    if (result) {
3358        *result = r;
3359    }
3360    if (release_lock) {
3361        qemu_mutex_unlock_iothread();
3362    }
3363    rcu_read_unlock();
3364    return val;
3365}
3366
3367uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3368                           MemTxAttrs attrs, MemTxResult *result)
3369{
3370    return address_space_ldq_internal(as, addr, attrs, result,
3371                                      DEVICE_NATIVE_ENDIAN);
3372}
3373
3374uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3375                           MemTxAttrs attrs, MemTxResult *result)
3376{
3377    return address_space_ldq_internal(as, addr, attrs, result,
3378                                      DEVICE_LITTLE_ENDIAN);
3379}
3380
3381uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3382                           MemTxAttrs attrs, MemTxResult *result)
3383{
3384    return address_space_ldq_internal(as, addr, attrs, result,
3385                                      DEVICE_BIG_ENDIAN);
3386}
3387
3388uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3389{
3390    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3391}
3392
3393uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3394{
3395    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3396}
3397
3398uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3399{
3400    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3401}
3402
3403/* XXX: optimize */
3404uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3405                            MemTxAttrs attrs, MemTxResult *result)
3406{
3407    uint8_t val;
3408    MemTxResult r;
3409
3410    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3411    if (result) {
3412        *result = r;
3413    }
3414    return val;
3415}
3416
3417uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3418{
3419    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3420}
3421
3422/* warning: addr must be aligned */
3423static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3424                                                   hwaddr addr,
3425                                                   MemTxAttrs attrs,
3426                                                   MemTxResult *result,
3427                                                   enum device_endian endian)
3428{
3429    uint8_t *ptr;
3430    uint64_t val;
3431    MemoryRegion *mr;
3432    hwaddr l = 2;
3433    hwaddr addr1;
3434    MemTxResult r;
3435    bool release_lock = false;
3436
3437    rcu_read_lock();
3438    mr = address_space_translate_attr(as, addr, &addr1, &l,
3439                                 false, &attrs);
3440    if (l < 2 || !memory_access_is_direct(mr, false)) {
3441        release_lock |= prepare_mmio_access(mr);
3442
3443        /* I/O case */
3444        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3445#if defined(TARGET_WORDS_BIGENDIAN)
3446        if (endian == DEVICE_LITTLE_ENDIAN) {
3447            val = bswap16(val);
3448        }
3449#else
3450        if (endian == DEVICE_BIG_ENDIAN) {
3451            val = bswap16(val);
3452        }
3453#endif
3454    } else {
3455        /* RAM case */
3456        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3457        switch (endian) {
3458        case DEVICE_LITTLE_ENDIAN:
3459            val = lduw_le_p(ptr);
3460            break;
3461        case DEVICE_BIG_ENDIAN:
3462            val = lduw_be_p(ptr);
3463            break;
3464        default:
3465            val = lduw_p(ptr);
3466            break;
3467        }
3468        r = MEMTX_OK;
3469    }
3470    if (result) {
3471        *result = r;
3472    }
3473    if (release_lock) {
3474        qemu_mutex_unlock_iothread();
3475    }
3476    rcu_read_unlock();
3477    return val;
3478}
3479
3480uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3481                           MemTxAttrs attrs, MemTxResult *result)
3482{
3483    return address_space_lduw_internal(as, addr, attrs, result,
3484                                       DEVICE_NATIVE_ENDIAN);
3485}
3486
3487uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3488                           MemTxAttrs attrs, MemTxResult *result)
3489{
3490    return address_space_lduw_internal(as, addr, attrs, result,
3491                                       DEVICE_LITTLE_ENDIAN);
3492}
3493
3494uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3495                           MemTxAttrs attrs, MemTxResult *result)
3496{
3497    return address_space_lduw_internal(as, addr, attrs, result,
3498                                       DEVICE_BIG_ENDIAN);
3499}
3500
3501uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3502{
3503    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3504}
3505
3506uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3507{
3508    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3509}
3510
3511uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3512{
3513    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3514}
3515
3516/* warning: addr must be aligned. The ram page is not masked as dirty
3517   and the code inside is not invalidated. It is useful if the dirty
3518   bits are used to track modified PTEs */
3519void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3520                                MemTxAttrs attrs, MemTxResult *result)
3521{
3522    uint8_t *ptr;
3523    MemoryRegion *mr;
3524    hwaddr l = 4;
3525    hwaddr addr1;
3526    MemTxResult r;
3527    uint8_t dirty_log_mask;
3528    bool release_lock = false;
3529
3530    rcu_read_lock();
3531    mr = address_space_translate_attr(as, addr, &addr1, &l,
3532                                 true, &attrs);
3533    if (l < 4 || !memory_access_is_direct(mr, true)) {
3534        release_lock |= prepare_mmio_access(mr);
3535
3536        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3537    } else {
3538        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3539        stl_p(ptr, val);
3540
3541        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3542        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3543        cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3544                                            4, dirty_log_mask);
3545        r = MEMTX_OK;
3546    }
3547    if (result) {
3548        *result = r;
3549    }
3550    if (release_lock) {
3551        qemu_mutex_unlock_iothread();
3552    }
3553    rcu_read_unlock();
3554}
3555
3556void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3557{
3558    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3559}
3560
3561/* warning: addr must be aligned */
3562static inline void address_space_stl_internal(AddressSpace *as,
3563                                              hwaddr addr, uint32_t val,
3564                                              MemTxAttrs attrs,
3565                                              MemTxResult *result,
3566                                              enum device_endian endian)
3567{
3568    uint8_t *ptr;
3569    MemoryRegion *mr;
3570    hwaddr l = 4;
3571    hwaddr addr1;
3572    MemTxResult r;
3573    bool release_lock = false;
3574
3575    rcu_read_lock();
3576    mr = address_space_translate_attr(as, addr, &addr1, &l,
3577                                 true, &attrs);
3578    if (l < 4 || !memory_access_is_direct(mr, true)) {
3579        release_lock |= prepare_mmio_access(mr);
3580
3581#if defined(TARGET_WORDS_BIGENDIAN)
3582        if (endian == DEVICE_LITTLE_ENDIAN) {
3583            val = bswap32(val);
3584        }
3585#else
3586        if (endian == DEVICE_BIG_ENDIAN) {
3587            val = bswap32(val);
3588        }
3589#endif
3590        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3591    } else {
3592        /* RAM case */
3593        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3594        switch (endian) {
3595        case DEVICE_LITTLE_ENDIAN:
3596            stl_le_p(ptr, val);
3597            break;
3598        case DEVICE_BIG_ENDIAN:
3599            stl_be_p(ptr, val);
3600            break;
3601        default:
3602            stl_p(ptr, val);
3603            break;
3604        }
3605        invalidate_and_set_dirty(mr, addr1, 4);
3606        r = MEMTX_OK;
3607    }
3608    if (result) {
3609        *result = r;
3610    }
3611    if (release_lock) {
3612        qemu_mutex_unlock_iothread();
3613    }
3614    rcu_read_unlock();
3615}
3616
3617void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3618                       MemTxAttrs attrs, MemTxResult *result)
3619{
3620    address_space_stl_internal(as, addr, val, attrs, result,
3621                               DEVICE_NATIVE_ENDIAN);
3622}
3623
3624void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3625                       MemTxAttrs attrs, MemTxResult *result)
3626{
3627    address_space_stl_internal(as, addr, val, attrs, result,
3628                               DEVICE_LITTLE_ENDIAN);
3629}
3630
3631void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3632                       MemTxAttrs attrs, MemTxResult *result)
3633{
3634    address_space_stl_internal(as, addr, val, attrs, result,
3635                               DEVICE_BIG_ENDIAN);
3636}
3637
3638void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3639{
3640    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3641}
3642
3643void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3644{
3645    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3646}
3647
3648void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3649{
3650    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3651}
3652
3653/* XXX: optimize */
3654void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3655                       MemTxAttrs attrs, MemTxResult *result)
3656{
3657    uint8_t v = val;
3658    MemTxResult r;
3659
3660    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3661    if (result) {
3662        *result = r;
3663    }
3664}
3665
3666void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3667{
3668    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3669}
3670
3671/* warning: addr must be aligned */
3672static inline void address_space_stw_internal(AddressSpace *as,
3673                                              hwaddr addr, uint32_t val,
3674                                              MemTxAttrs attrs,
3675                                              MemTxResult *result,
3676                                              enum device_endian endian)
3677{
3678    uint8_t *ptr;
3679    MemoryRegion *mr;
3680    hwaddr l = 2;
3681    hwaddr addr1;
3682    MemTxResult r;
3683    bool release_lock = false;
3684
3685    rcu_read_lock();
3686    mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
3687    if (l < 2 || !memory_access_is_direct(mr, true)) {
3688        release_lock |= prepare_mmio_access(mr);
3689
3690#if defined(TARGET_WORDS_BIGENDIAN)
3691        if (endian == DEVICE_LITTLE_ENDIAN) {
3692            val = bswap16(val);
3693        }
3694#else
3695        if (endian == DEVICE_BIG_ENDIAN) {
3696            val = bswap16(val);
3697        }
3698#endif
3699        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3700    } else {
3701        /* RAM case */
3702        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3703        switch (endian) {
3704        case DEVICE_LITTLE_ENDIAN:
3705            stw_le_p(ptr, val);
3706            break;
3707        case DEVICE_BIG_ENDIAN:
3708            stw_be_p(ptr, val);
3709            break;
3710        default:
3711            stw_p(ptr, val);
3712            break;
3713        }
3714        invalidate_and_set_dirty(mr, addr1, 2);
3715        r = MEMTX_OK;
3716    }
3717    if (result) {
3718        *result = r;
3719    }
3720    if (release_lock) {
3721        qemu_mutex_unlock_iothread();
3722    }
3723    rcu_read_unlock();
3724}
3725
3726void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3727                       MemTxAttrs attrs, MemTxResult *result)
3728{
3729    address_space_stw_internal(as, addr, val, attrs, result,
3730                               DEVICE_NATIVE_ENDIAN);
3731}
3732
3733void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3734                       MemTxAttrs attrs, MemTxResult *result)
3735{
3736    address_space_stw_internal(as, addr, val, attrs, result,
3737                               DEVICE_LITTLE_ENDIAN);
3738}
3739
3740void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3741                       MemTxAttrs attrs, MemTxResult *result)
3742{
3743    address_space_stw_internal(as, addr, val, attrs, result,
3744                               DEVICE_BIG_ENDIAN);
3745}
3746
3747void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3748{
3749    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3750}
3751
3752void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3753{
3754    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3755}
3756
3757void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3758{
3759    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3760}
3761
3762/* XXX: optimize */
3763void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3764                       MemTxAttrs attrs, MemTxResult *result)
3765{
3766    MemTxResult r;
3767    val = tswap64(val);
3768    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3769    if (result) {
3770        *result = r;
3771    }
3772}
3773
3774void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3775                       MemTxAttrs attrs, MemTxResult *result)
3776{
3777    MemTxResult r;
3778    val = cpu_to_le64(val);
3779    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3780    if (result) {
3781        *result = r;
3782    }
3783}
3784void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3785                       MemTxAttrs attrs, MemTxResult *result)
3786{
3787    MemTxResult r;
3788    val = cpu_to_be64(val);
3789    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3790    if (result) {
3791        *result = r;
3792    }
3793}
3794
3795void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3796{
3797    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3798}
3799
3800void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3801{
3802    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3803}
3804
3805void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3806{
3807    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3808}
3809
3810/* virtual memory access for debug (includes writing to ROM) */
3811int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3812                        uint8_t *buf, int len, int is_write)
3813{
3814    int l;
3815    hwaddr phys_addr;
3816    target_ulong page;
3817
3818    while (len > 0) {
3819        int asidx;
3820        MemTxAttrs attrs;
3821
3822        page = addr & TARGET_PAGE_MASK;
3823        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3824        asidx = cpu_asidx_from_attrs(cpu, attrs);
3825        /* if no physical page mapped, return an error */
3826        if (phys_addr == -1)
3827            return -1;
3828        l = (page + TARGET_PAGE_SIZE) - addr;
3829        if (l > len)
3830            l = len;
3831        phys_addr += (addr & ~TARGET_PAGE_MASK);
3832        if (is_write) {
3833            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3834                                          phys_addr, buf, l);
3835        } else {
3836            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3837                             MEMTXATTRS_UNSPECIFIED,
3838                             buf, l, 0);
3839        }
3840        len -= l;
3841        buf += l;
3842        addr += l;
3843    }
3844    return 0;
3845}
3846
3847/*
3848 * Allows code that needs to deal with migration bitmaps etc to still be built
3849 * target independent.
3850 */
3851size_t qemu_target_page_bits(void)
3852{
3853    return TARGET_PAGE_BITS;
3854}
3855
3856#endif
3857
3858/*
3859 * A helper function for the _utterly broken_ virtio device model to find out if
3860 * it's running on a big endian machine. Don't do this at home kids!
3861 */
3862bool target_words_bigendian(void);
3863bool target_words_bigendian(void)
3864{
3865#if defined(TARGET_WORDS_BIGENDIAN)
3866    return true;
3867#else
3868    return false;
3869#endif
3870}
3871
3872#ifndef CONFIG_USER_ONLY
3873bool cpu_physical_memory_is_io(hwaddr phys_addr)
3874{
3875    MemoryRegion*mr;
3876    hwaddr l = 1;
3877    bool res;
3878
3879    rcu_read_lock();
3880    mr = address_space_translate(&address_space_memory,
3881                                 phys_addr, &phys_addr, &l, false);
3882
3883    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3884    rcu_read_unlock();
3885    return res;
3886}
3887
3888int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3889{
3890    RAMBlock *block;
3891    int ret = 0;
3892
3893    rcu_read_lock();
3894    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3895        ret = func(block->idstr, block->host, block->offset,
3896                   block->used_length, opaque);
3897        if (ret) {
3898            break;
3899        }
3900    }
3901    rcu_read_unlock();
3902    return ret;
3903}
3904#endif
3905
3906void cpu_halt_update(CPUState *cpu)
3907{
3908    bool val;
3909
3910    val = cpu->reset_pin || cpu->halt_pin || cpu->arch_halt_pin;
3911
3912    if (val) {
3913        cpu_interrupt(cpu, CPU_INTERRUPT_HALT);
3914    } else {
3915        cpu_reset_interrupt(cpu, CPU_INTERRUPT_HALT);
3916    }
3917}
3918
3919void cpu_reset_gpio(void *opaque, int irq, int level)
3920{
3921    CPUState *cpu = CPU(opaque);
3922
3923    if (level == cpu->reset_pin) {
3924        return;
3925    }
3926
3927    if (level || cpu->reset_pin) {
3928        cpu_reset(cpu);
3929    }
3930
3931    cpu->reset_pin = level;
3932    cpu_halt_update(cpu);
3933}
3934
3935void cpu_halt_gpio(void *opaque, int irq, int level)
3936{
3937    CPUState *cpu = CPU(opaque);
3938
3939    cpu->halt_pin = level;
3940    cpu_halt_update(cpu);
3941}
3942