qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu/cutils.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "hw/xen/xen.h"
  36#include "qemu/timer.h"
  37#include "qemu/config-file.h"
  38#include "qemu/error-report.h"
  39#include "exec/memory.h"
  40#include "sysemu/dma.h"
  41#include "exec/address-spaces.h"
  42#if defined(CONFIG_USER_ONLY)
  43#include <qemu.h>
  44#else /* !CONFIG_USER_ONLY */
  45#include "sysemu/xen-mapcache.h"
  46#include "trace.h"
  47#endif
  48#include "exec/cpu-all.h"
  49#include "qemu/rcu_queue.h"
  50#include "qemu/main-loop.h"
  51#include "translate-all.h"
  52#include "sysemu/replay.h"
  53
  54#include "exec/memory-internal.h"
  55#include "exec/ram_addr.h"
  56#include "exec/log.h"
  57
  58#include "qemu/range.h"
  59#ifndef _WIN32
  60#include "qemu/mmap-alloc.h"
  61#endif
  62#ifdef _WIN32
  63#include <io.h>
  64#endif
  65
  66//#define DEBUG_SUBPAGE
  67
  68#if !defined(CONFIG_USER_ONLY)
  69/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70 * are protected by the ramlist lock.
  71 */
  72RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74static MemoryRegion *system_memory;
  75static MemoryRegion *system_io;
  76
  77AddressSpace address_space_io;
  78AddressSpace address_space_memory;
  79
  80MemoryRegion io_mem_rom, io_mem_notdirty;
  81static MemoryRegion io_mem_unassigned;
  82
  83/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84#define RAM_PREALLOC   (1 << 0)
  85
  86/* RAM is mmap-ed with MAP_SHARED */
  87#define RAM_SHARED     (1 << 1)
  88
  89/* Only a portion of RAM (used_length) is actually used, and migrated.
  90 * This used_length size can change across reboots.
  91 */
  92#define RAM_RESIZEABLE (1 << 2)
  93
  94/* RAM is backed by an mmapped file.
  95 */
  96#define RAM_FILE (1 << 3)
  97#endif
  98
  99struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 100/* current CPU in the current thread. It is only valid inside
 101   cpu_exec() */
 102__thread CPUState *current_cpu;
 103/* 0 = Do not count executed instructions.
 104   1 = Precise instruction counting.
 105   2 = Adaptive rate instruction counting.  */
 106int use_icount;
 107
 108#if !defined(CONFIG_USER_ONLY)
 109
 110typedef struct PhysPageEntry PhysPageEntry;
 111
 112struct PhysPageEntry {
 113    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 114    uint32_t skip : 6;
 115     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 116    uint32_t ptr : 26;
 117};
 118
 119#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 120
 121/* Size of the L2 (and L3, etc) page tables.  */
 122#define ADDR_SPACE_BITS 64
 123
 124#define P_L2_BITS 9
 125#define P_L2_SIZE (1 << P_L2_BITS)
 126
 127#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 128
 129typedef PhysPageEntry Node[P_L2_SIZE];
 130
 131typedef struct PhysPageMap {
 132    struct rcu_head rcu;
 133
 134    unsigned sections_nb;
 135    unsigned sections_nb_alloc;
 136    unsigned nodes_nb;
 137    unsigned nodes_nb_alloc;
 138    Node *nodes;
 139    MemoryRegionSection *sections;
 140} PhysPageMap;
 141
 142struct AddressSpaceDispatch {
 143    struct rcu_head rcu;
 144
 145    MemoryRegionSection *mru_section;
 146    /* This is a multi-level map on the physical address space.
 147     * The bottom level has pointers to MemoryRegionSections.
 148     */
 149    PhysPageEntry phys_map;
 150    PhysPageMap map;
 151    AddressSpace *as;
 152};
 153
 154#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 155typedef struct subpage_t {
 156    MemoryRegion iomem;
 157    AddressSpace *as;
 158    hwaddr base;
 159    uint16_t sub_section[TARGET_PAGE_SIZE];
 160} subpage_t;
 161
 162#define PHYS_SECTION_UNASSIGNED 0
 163#define PHYS_SECTION_NOTDIRTY 1
 164#define PHYS_SECTION_ROM 2
 165#define PHYS_SECTION_WATCH 3
 166
 167static void io_mem_init(void);
 168static void memory_map_init(void);
 169static void tcg_commit(MemoryListener *listener);
 170
 171static MemoryRegion io_mem_watch;
 172
 173/**
 174 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 175 * @cpu: the CPU whose AddressSpace this is
 176 * @as: the AddressSpace itself
 177 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 178 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 179 */
 180struct CPUAddressSpace {
 181    CPUState *cpu;
 182    AddressSpace *as;
 183    struct AddressSpaceDispatch *memory_dispatch;
 184    MemoryListener tcg_as_listener;
 185};
 186
 187#endif
 188
 189#if !defined(CONFIG_USER_ONLY)
 190
 191static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 192{
 193    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 194        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 195        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 196        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 197    }
 198}
 199
 200static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 201{
 202    unsigned i;
 203    uint32_t ret;
 204    PhysPageEntry e;
 205    PhysPageEntry *p;
 206
 207    ret = map->nodes_nb++;
 208    p = map->nodes[ret];
 209    assert(ret != PHYS_MAP_NODE_NIL);
 210    assert(ret != map->nodes_nb_alloc);
 211
 212    e.skip = leaf ? 0 : 1;
 213    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 214    for (i = 0; i < P_L2_SIZE; ++i) {
 215        memcpy(&p[i], &e, sizeof(e));
 216    }
 217    return ret;
 218}
 219
 220static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 221                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 222                                int level)
 223{
 224    PhysPageEntry *p;
 225    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 226
 227    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 228        lp->ptr = phys_map_node_alloc(map, level == 0);
 229    }
 230    p = map->nodes[lp->ptr];
 231    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 232
 233    while (*nb && lp < &p[P_L2_SIZE]) {
 234        if ((*index & (step - 1)) == 0 && *nb >= step) {
 235            lp->skip = 0;
 236            lp->ptr = leaf;
 237            *index += step;
 238            *nb -= step;
 239        } else {
 240            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 241        }
 242        ++lp;
 243    }
 244}
 245
 246static void phys_page_set(AddressSpaceDispatch *d,
 247                          hwaddr index, hwaddr nb,
 248                          uint16_t leaf)
 249{
 250    /* Wildly overreserve - it doesn't matter much. */
 251    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 252
 253    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 254}
 255
 256/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 257 * and update our entry so we can skip it and go directly to the destination.
 258 */
 259static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 260{
 261    unsigned valid_ptr = P_L2_SIZE;
 262    int valid = 0;
 263    PhysPageEntry *p;
 264    int i;
 265
 266    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 267        return;
 268    }
 269
 270    p = nodes[lp->ptr];
 271    for (i = 0; i < P_L2_SIZE; i++) {
 272        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 273            continue;
 274        }
 275
 276        valid_ptr = i;
 277        valid++;
 278        if (p[i].skip) {
 279            phys_page_compact(&p[i], nodes, compacted);
 280        }
 281    }
 282
 283    /* We can only compress if there's only one child. */
 284    if (valid != 1) {
 285        return;
 286    }
 287
 288    assert(valid_ptr < P_L2_SIZE);
 289
 290    /* Don't compress if it won't fit in the # of bits we have. */
 291    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 292        return;
 293    }
 294
 295    lp->ptr = p[valid_ptr].ptr;
 296    if (!p[valid_ptr].skip) {
 297        /* If our only child is a leaf, make this a leaf. */
 298        /* By design, we should have made this node a leaf to begin with so we
 299         * should never reach here.
 300         * But since it's so simple to handle this, let's do it just in case we
 301         * change this rule.
 302         */
 303        lp->skip = 0;
 304    } else {
 305        lp->skip += p[valid_ptr].skip;
 306    }
 307}
 308
 309static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 310{
 311    DECLARE_BITMAP(compacted, nodes_nb);
 312
 313    if (d->phys_map.skip) {
 314        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 315    }
 316}
 317
 318static inline bool section_covers_addr(const MemoryRegionSection *section,
 319                                       hwaddr addr)
 320{
 321    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 322     * the section must cover the entire address space.
 323     */
 324    return section->size.hi ||
 325           range_covers_byte(section->offset_within_address_space,
 326                             section->size.lo, addr);
 327}
 328
 329static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 330                                           Node *nodes, MemoryRegionSection *sections)
 331{
 332    PhysPageEntry *p;
 333    hwaddr index = addr >> TARGET_PAGE_BITS;
 334    int i;
 335
 336    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 337        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 338            return &sections[PHYS_SECTION_UNASSIGNED];
 339        }
 340        p = nodes[lp.ptr];
 341        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 342    }
 343
 344    if (section_covers_addr(&sections[lp.ptr], addr)) {
 345        return &sections[lp.ptr];
 346    } else {
 347        return &sections[PHYS_SECTION_UNASSIGNED];
 348    }
 349}
 350
 351bool memory_region_is_unassigned(MemoryRegion *mr)
 352{
 353    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 354        && mr != &io_mem_watch;
 355}
 356
 357/* Called from RCU critical section */
 358static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 359                                                        hwaddr addr,
 360                                                        bool resolve_subpage)
 361{
 362    MemoryRegionSection *section = atomic_read(&d->mru_section);
 363    subpage_t *subpage;
 364    bool update;
 365
 366    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 367        section_covers_addr(section, addr)) {
 368        update = false;
 369    } else {
 370        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 371                                 d->map.sections);
 372        update = true;
 373    }
 374    if (resolve_subpage && section->mr->subpage) {
 375        subpage = container_of(section->mr, subpage_t, iomem);
 376        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 377    }
 378    if (update) {
 379        atomic_set(&d->mru_section, section);
 380    }
 381    return section;
 382}
 383
 384/* Called from RCU critical section */
 385static MemoryRegionSection *
 386address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 387                                 hwaddr *plen, bool resolve_subpage)
 388{
 389    MemoryRegionSection *section;
 390    MemoryRegion *mr;
 391    Int128 diff;
 392
 393    section = address_space_lookup_region(d, addr, resolve_subpage);
 394    /* Compute offset within MemoryRegionSection */
 395    addr -= section->offset_within_address_space;
 396
 397    /* Compute offset within MemoryRegion */
 398    *xlat = addr + section->offset_within_region;
 399
 400    mr = section->mr;
 401
 402    /* MMIO registers can be expected to perform full-width accesses based only
 403     * on their address, without considering adjacent registers that could
 404     * decode to completely different MemoryRegions.  When such registers
 405     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 406     * regions overlap wildly.  For this reason we cannot clamp the accesses
 407     * here.
 408     *
 409     * If the length is small (as is the case for address_space_ldl/stl),
 410     * everything works fine.  If the incoming length is large, however,
 411     * the caller really has to do the clamping through memory_access_size.
 412     */
 413    if (memory_region_is_ram(mr)) {
 414        diff = int128_sub(section->size, int128_make64(addr));
 415        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 416    }
 417    return section;
 418}
 419
 420MemoryRegion *address_space_translate_attr(AddressSpace *as, hwaddr addr,
 421                                           hwaddr *xlat, hwaddr *plen,
 422                                           bool is_write,
 423                                           MemTxAttrs *attr)
 424{
 425    IOMMUTLBEntry iotlb;
 426    MemoryRegionSection *section;
 427    MemoryRegion *mr;
 428
 429    for (;;) {
 430        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 431        section = address_space_translate_internal(d, addr, &addr, plen, true);
 432        mr = section->mr;
 433
 434        if (!mr->iommu_ops) {
 435            break;
 436        }
 437
 438        if (mr->iommu_ops->translate_attr) {
 439            iotlb = mr->iommu_ops->translate_attr(mr, addr, is_write, attr);
 440        } else {
 441            iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 442        }
 443
 444        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 445                | (addr & iotlb.addr_mask));
 446        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 447        if (!(iotlb.perm & (1 << is_write))) {
 448            mr = &io_mem_unassigned;
 449            break;
 450        }
 451
 452        as = iotlb.target_as;
 453    }
 454
 455    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 456        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 457        *plen = MIN(page, *plen);
 458    }
 459
 460    *xlat = addr;
 461    return mr;
 462}
 463
 464MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 465                                      hwaddr *xlat, hwaddr *plen,
 466                                      bool is_write)
 467{
 468    MemTxAttrs attr = MEMTXATTRS_UNSPECIFIED;
 469    return address_space_translate_attr(as, addr, xlat, plen, is_write,
 470                                        &attr);
 471}
 472
 473/* Called from RCU critical section */
 474MemoryRegionSection *
 475address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 476                                  hwaddr *xlat, hwaddr *plen, int *prot,
 477                                  MemTxAttrs *attr)
 478{
 479    MemoryRegionSection *section;
 480    AddressSpace *as = cpu->cpu_ases[asidx].memory_dispatch->as;
 481
 482    IOMMUTLBEntry iotlb;
 483    struct {
 484        MemoryRegionSection *section;
 485        hwaddr addr;
 486        hwaddr len;
 487    } root =  { .section = NULL, .addr = addr};
 488    AddressSpace *orig_as = as;
 489    MemoryRegion *mr;
 490    hwaddr len = *plen;
 491
 492    assert(prot);
 493
 494    while (1) {
 495        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 496        section = address_space_translate_internal(d, addr, &addr, plen, false);
 497        mr = section->mr;
 498
 499        if (!mr->iommu_ops) {
 500            break;
 501        }
 502
 503        /* FIXME: these are not necessarily accesses, so is_write doesn't make
 504           sense!  */
 505        if (mr->iommu_ops->translate_attr) {
 506            iotlb = mr->iommu_ops->translate_attr(mr, addr, false, attr);
 507        } else {
 508            iotlb = mr->iommu_ops->translate(mr, addr, false);
 509        }
 510        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 511                | (addr & iotlb.addr_mask));
 512        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
 513        as = iotlb.target_as;
 514
 515        if (!root.section && orig_as != as) {
 516            root.section = section;
 517            root.len = *plen;
 518        }
 519    }
 520
 521    *plen = len;
 522    *xlat = addr;
 523
 524    /* If the IOMMU translated addr into IO in a different AS, refer to
 525     * the IOMMU itself and do a slow translated access at access time.
 526     * TODO: If the iotlb could record dst AS, this wouldn't be needed.
 527     */
 528    if (!memory_region_is_ram(section->mr) && as != orig_as) {
 529        *plen = root.len;
 530        *xlat = root.addr;
 531        section = root.section;
 532    }
 533//    qemu_log("as=%p mr=%p addr=%lx len=%lx\n", as, section->mr, *xlat, *plen);
 534    return section;
 535}
 536#endif
 537
 538#if !defined(CONFIG_USER_ONLY)
 539
 540static int cpu_common_post_load(void *opaque, int version_id)
 541{
 542    CPUState *cpu = opaque;
 543
 544    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 545       version_id is increased. */
 546    cpu->interrupt_request &= ~0x01;
 547    tlb_flush(cpu, 1);
 548
 549    return 0;
 550}
 551
 552static int cpu_common_pre_load(void *opaque)
 553{
 554    CPUState *cpu = opaque;
 555
 556    cpu->exception_index = -1;
 557
 558    return 0;
 559}
 560
 561static bool cpu_common_exception_index_needed(void *opaque)
 562{
 563    CPUState *cpu = opaque;
 564
 565    return tcg_enabled() && cpu->exception_index != -1;
 566}
 567
 568static const VMStateDescription vmstate_cpu_common_exception_index = {
 569    .name = "cpu_common/exception_index",
 570    .version_id = 1,
 571    .minimum_version_id = 1,
 572    .needed = cpu_common_exception_index_needed,
 573    .fields = (VMStateField[]) {
 574        VMSTATE_INT32(exception_index, CPUState),
 575        VMSTATE_END_OF_LIST()
 576    }
 577};
 578
 579static bool cpu_common_crash_occurred_needed(void *opaque)
 580{
 581    CPUState *cpu = opaque;
 582
 583    return cpu->crash_occurred;
 584}
 585
 586static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 587    .name = "cpu_common/crash_occurred",
 588    .version_id = 1,
 589    .minimum_version_id = 1,
 590    .needed = cpu_common_crash_occurred_needed,
 591    .fields = (VMStateField[]) {
 592        VMSTATE_BOOL(crash_occurred, CPUState),
 593        VMSTATE_END_OF_LIST()
 594    }
 595};
 596
 597const VMStateDescription vmstate_cpu_common = {
 598    .name = "cpu_common",
 599    .version_id = 1,
 600    .minimum_version_id = 1,
 601    .pre_load = cpu_common_pre_load,
 602    .post_load = cpu_common_post_load,
 603    .fields = (VMStateField[]) {
 604        VMSTATE_UINT32(halted, CPUState),
 605        VMSTATE_UINT32(interrupt_request, CPUState),
 606        VMSTATE_END_OF_LIST()
 607    },
 608    .subsections = (const VMStateDescription*[]) {
 609        &vmstate_cpu_common_exception_index,
 610        &vmstate_cpu_common_crash_occurred,
 611        NULL
 612    }
 613};
 614
 615#endif
 616
 617CPUState *qemu_get_cpu(int index)
 618{
 619    CPUState *cpu;
 620
 621    CPU_FOREACH(cpu) {
 622        if (cpu->cpu_index == index) {
 623            return cpu;
 624        }
 625    }
 626
 627    return NULL;
 628}
 629
 630#if !defined(CONFIG_USER_ONLY)
 631void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 632{
 633    CPUAddressSpace *newas;
 634
 635    /* Target code should have set num_ases before calling us */
 636    assert(asidx < cpu->num_ases);
 637
 638    if (asidx == 0) {
 639        /* address space 0 gets the convenience alias */
 640        cpu->as = as;
 641    }
 642
 643    /* KVM cannot currently support multiple address spaces. */
 644    assert(asidx == 0 || !kvm_enabled());
 645
 646    if (!cpu->cpu_ases) {
 647        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 648    }
 649
 650    newas = &cpu->cpu_ases[asidx];
 651    newas->cpu = cpu;
 652    newas->as = as;
 653    if (tcg_enabled()) {
 654        newas->tcg_as_listener.commit = tcg_commit;
 655        memory_listener_register(&newas->tcg_as_listener, as);
 656    }
 657}
 658
 659AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 660{
 661    /* Return the AddressSpace corresponding to the specified index */
 662    return cpu->cpu_ases[asidx].as;
 663}
 664#endif
 665
 666#ifndef CONFIG_USER_ONLY
 667static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 668
 669static int cpu_get_free_index(Error **errp)
 670{
 671    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 672
 673    if (cpu >= MAX_CPUMASK_BITS) {
 674        error_setg(errp, "Trying to use more CPUs than max of %d",
 675                   MAX_CPUMASK_BITS);
 676        return -1;
 677    }
 678
 679    bitmap_set(cpu_index_map, cpu, 1);
 680    return cpu;
 681}
 682
 683void cpu_exec_exit(CPUState *cpu)
 684{
 685    if (cpu->cpu_index == -1) {
 686        /* cpu_index was never allocated by this @cpu or was already freed. */
 687        return;
 688    }
 689
 690    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 691    cpu->cpu_index = -1;
 692}
 693#else
 694
 695static int cpu_get_free_index(Error **errp)
 696{
 697    CPUState *some_cpu;
 698    int cpu_index = 0;
 699
 700    CPU_FOREACH(some_cpu) {
 701        cpu_index++;
 702    }
 703    return cpu_index;
 704}
 705
 706void cpu_exec_exit(CPUState *cpu)
 707{
 708}
 709#endif
 710
 711void cpu_exec_init(CPUState *cpu, Error **errp)
 712{
 713    CPUClass *cc = CPU_GET_CLASS(cpu);
 714    int cpu_index;
 715    Error *local_err = NULL;
 716
 717    cpu->as = NULL;
 718    cpu->num_ases = 0;
 719
 720#ifndef CONFIG_USER_ONLY
 721    cpu->thread_id = qemu_get_thread_id();
 722
 723    /* This is a softmmu CPU object, so create a property for it
 724     * so users can wire up its memory. (This can't go in qom/cpu.c
 725     * because that file is compiled only once for both user-mode
 726     * and system builds.) The default if no link is set up is to use
 727     * the system address space.
 728     */
 729    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 730                             (Object **)&cpu->memory,
 731                             qdev_prop_allow_set_link_before_realize,
 732                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 733                             &error_abort);
 734    cpu->memory = system_memory;
 735    object_ref(OBJECT(cpu->memory));
 736#endif
 737
 738#if defined(CONFIG_USER_ONLY)
 739    cpu_list_lock();
 740#endif
 741    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 742    if (local_err) {
 743        error_propagate(errp, local_err);
 744#if defined(CONFIG_USER_ONLY)
 745        cpu_list_unlock();
 746#endif
 747        return;
 748    }
 749    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 750#if defined(CONFIG_USER_ONLY)
 751    cpu_list_unlock();
 752#endif
 753    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 754        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 755    }
 756    if (cc->vmsd != NULL) {
 757        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 758    }
 759}
 760
 761#if defined(CONFIG_USER_ONLY)
 762static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 763{
 764    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 765}
 766#else
 767static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 768{
 769    MemTxAttrs attrs;
 770    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
 771    int asidx = cpu_asidx_from_attrs(cpu, attrs);
 772    if (phys != -1) {
 773        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
 774                                phys | (pc & ~TARGET_PAGE_MASK));
 775    }
 776}
 777#endif
 778
 779#if defined(CONFIG_USER_ONLY)
 780void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 781
 782{
 783}
 784
 785int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 786                          int flags)
 787{
 788    return -ENOSYS;
 789}
 790
 791void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 792{
 793}
 794
 795int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 796                          int flags, CPUWatchpoint **watchpoint)
 797{
 798    return -ENOSYS;
 799}
 800#else
 801/* Add a watchpoint.  */
 802int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 803                          int flags, CPUWatchpoint **watchpoint)
 804{
 805    CPUWatchpoint *wp;
 806
 807    /* forbid ranges which are empty or run off the end of the address space */
 808    if (len == 0 || (addr + len - 1) < addr) {
 809        error_report("tried to set invalid watchpoint at %"
 810                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 811        return -EINVAL;
 812    }
 813    wp = g_malloc(sizeof(*wp));
 814
 815    wp->vaddr = addr;
 816    wp->len = len;
 817    wp->flags = flags;
 818
 819    /* keep all GDB-injected watchpoints in front */
 820    if (flags & BP_GDB) {
 821        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 822    } else {
 823        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 824    }
 825
 826    tlb_flush_page(cpu, addr);
 827
 828    if (watchpoint)
 829        *watchpoint = wp;
 830    return 0;
 831}
 832
 833/* Remove a specific watchpoint.  */
 834int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 835                          int flags)
 836{
 837    CPUWatchpoint *wp;
 838
 839    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 840        if (addr == wp->vaddr && len == wp->len
 841                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 842            cpu_watchpoint_remove_by_ref(cpu, wp);
 843            return 0;
 844        }
 845    }
 846    return -ENOENT;
 847}
 848
 849/* Remove a specific watchpoint by reference.  */
 850void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 851{
 852    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 853
 854    tlb_flush_page(cpu, watchpoint->vaddr);
 855
 856    g_free(watchpoint);
 857}
 858
 859/* Remove all matching watchpoints.  */
 860void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 861{
 862    CPUWatchpoint *wp, *next;
 863
 864    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 865        if (wp->flags & mask) {
 866            cpu_watchpoint_remove_by_ref(cpu, wp);
 867        }
 868    }
 869}
 870
 871/* Return true if this watchpoint address matches the specified
 872 * access (ie the address range covered by the watchpoint overlaps
 873 * partially or completely with the address range covered by the
 874 * access).
 875 */
 876static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 877                                                  vaddr addr,
 878                                                  vaddr len)
 879{
 880    /* We know the lengths are non-zero, but a little caution is
 881     * required to avoid errors in the case where the range ends
 882     * exactly at the top of the address space and so addr + len
 883     * wraps round to zero.
 884     */
 885    vaddr wpend = wp->vaddr + wp->len - 1;
 886    vaddr addrend = addr + len - 1;
 887
 888    return !(addr > wpend || wp->vaddr > addrend);
 889}
 890
 891#endif
 892
 893/* Add a breakpoint.  */
 894int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 895                          CPUBreakpoint **breakpoint)
 896{
 897    CPUBreakpoint *bp;
 898
 899    bp = g_malloc(sizeof(*bp));
 900
 901    bp->pc = pc;
 902    bp->flags = flags;
 903
 904    /* keep all GDB-injected breakpoints in front */
 905    if (flags & BP_GDB) {
 906        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 907    } else {
 908        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 909    }
 910
 911    breakpoint_invalidate(cpu, pc);
 912
 913    if (breakpoint) {
 914        *breakpoint = bp;
 915    }
 916    return 0;
 917}
 918
 919/* Remove a specific breakpoint.  */
 920int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 921{
 922    CPUBreakpoint *bp;
 923
 924    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 925        if (bp->pc == pc && bp->flags == flags) {
 926            cpu_breakpoint_remove_by_ref(cpu, bp);
 927            return 0;
 928        }
 929    }
 930    return -ENOENT;
 931}
 932
 933/* Remove a specific breakpoint by reference.  */
 934void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 935{
 936    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 937
 938    breakpoint_invalidate(cpu, breakpoint->pc);
 939
 940    g_free(breakpoint);
 941}
 942
 943/* Remove all matching breakpoints. */
 944void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 945{
 946    CPUBreakpoint *bp, *next;
 947
 948    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 949        if (bp->flags & mask) {
 950            cpu_breakpoint_remove_by_ref(cpu, bp);
 951        }
 952    }
 953}
 954
 955/* enable or disable single step mode. EXCP_DEBUG is returned by the
 956   CPU loop after each instruction */
 957void cpu_single_step(CPUState *cpu, int enabled)
 958{
 959    if (cpu->singlestep_enabled != enabled) {
 960        cpu->singlestep_enabled = enabled;
 961        if (kvm_enabled()) {
 962            kvm_update_guest_debug(cpu, 0);
 963        } else {
 964            /* must flush all the translated code to avoid inconsistencies */
 965            /* XXX: only flush what is necessary */
 966            tb_flush(cpu);
 967        }
 968    }
 969}
 970
 971void cpu_abort(CPUState *cpu, const char *fmt, ...)
 972{
 973    va_list ap;
 974    va_list ap2;
 975
 976    va_start(ap, fmt);
 977    va_copy(ap2, ap);
 978    fprintf(stderr, "qemu: fatal: ");
 979    vfprintf(stderr, fmt, ap);
 980    fprintf(stderr, "\n");
 981    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 982    if (qemu_log_separate()) {
 983        qemu_log("qemu: fatal: ");
 984        qemu_log_vprintf(fmt, ap2);
 985        qemu_log("\n");
 986        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 987        qemu_log_flush();
 988        qemu_log_close();
 989    }
 990    va_end(ap2);
 991    va_end(ap);
 992    replay_finish();
 993#if defined(CONFIG_USER_ONLY)
 994    {
 995        struct sigaction act;
 996        sigfillset(&act.sa_mask);
 997        act.sa_handler = SIG_DFL;
 998        sigaction(SIGABRT, &act, NULL);
 999    }
1000#endif
1001    abort();
1002}
1003
1004#if !defined(CONFIG_USER_ONLY)
1005/* Called from RCU critical section */
1006static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1007{
1008    RAMBlock *block;
1009
1010    block = atomic_rcu_read(&ram_list.mru_block);
1011    if (block && addr - block->offset < block->max_length) {
1012        return block;
1013    }
1014    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1015        if (addr - block->offset < block->max_length) {
1016            goto found;
1017        }
1018    }
1019
1020    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1021    abort();
1022
1023found:
1024    /* It is safe to write mru_block outside the iothread lock.  This
1025     * is what happens:
1026     *
1027     *     mru_block = xxx
1028     *     rcu_read_unlock()
1029     *                                        xxx removed from list
1030     *                  rcu_read_lock()
1031     *                  read mru_block
1032     *                                        mru_block = NULL;
1033     *                                        call_rcu(reclaim_ramblock, xxx);
1034     *                  rcu_read_unlock()
1035     *
1036     * atomic_rcu_set is not needed here.  The block was already published
1037     * when it was placed into the list.  Here we're just making an extra
1038     * copy of the pointer.
1039     */
1040    ram_list.mru_block = block;
1041    return block;
1042}
1043
1044static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1045{
1046    CPUState *cpu;
1047    ram_addr_t start1;
1048    RAMBlock *block;
1049    ram_addr_t end;
1050
1051    end = TARGET_PAGE_ALIGN(start + length);
1052    start &= TARGET_PAGE_MASK;
1053
1054    rcu_read_lock();
1055    block = qemu_get_ram_block(start);
1056    assert(block == qemu_get_ram_block(end - 1));
1057    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1058    CPU_FOREACH(cpu) {
1059        tlb_reset_dirty(cpu, start1, length);
1060    }
1061    rcu_read_unlock();
1062}
1063
1064/* Note: start and end must be within the same ram block.  */
1065bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1066                                              ram_addr_t length,
1067                                              unsigned client)
1068{
1069    DirtyMemoryBlocks *blocks;
1070    unsigned long end, page;
1071    bool dirty = false;
1072
1073    if (length == 0) {
1074        return false;
1075    }
1076
1077    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1078    page = start >> TARGET_PAGE_BITS;
1079
1080    rcu_read_lock();
1081
1082    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1083
1084    while (page < end) {
1085        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1086        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1087        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1088
1089        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1090                                              offset, num);
1091        page += num;
1092    }
1093
1094    rcu_read_unlock();
1095
1096    if (dirty && tcg_enabled()) {
1097        tlb_reset_dirty_range_all(start, length);
1098    }
1099
1100    return dirty;
1101}
1102
1103/* Called from RCU critical section */
1104hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1105                                       MemoryRegionSection *section,
1106                                       target_ulong vaddr,
1107                                       hwaddr paddr, hwaddr xlat,
1108                                       int prot,
1109                                       target_ulong *address)
1110{
1111    hwaddr iotlb;
1112    CPUWatchpoint *wp;
1113
1114    if (memory_region_is_ram(section->mr)) {
1115        /* Normal RAM.  */
1116        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1117        if (!section->readonly) {
1118            iotlb |= PHYS_SECTION_NOTDIRTY;
1119        } else {
1120            iotlb |= PHYS_SECTION_ROM;
1121        }
1122    } else {
1123        AddressSpaceDispatch *d;
1124
1125        d = atomic_rcu_read(&section->address_space->dispatch);
1126        iotlb = section - d->map.sections;
1127        iotlb += xlat;
1128    }
1129
1130    /* Make accesses to pages with watchpoints go via the
1131       watchpoint trap routines.  */
1132    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1133        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1134            /* Avoid trapping reads of pages with a write breakpoint. */
1135            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1136                iotlb = PHYS_SECTION_WATCH + paddr;
1137                *address |= TLB_MMIO;
1138                break;
1139            }
1140        }
1141    }
1142
1143    return iotlb;
1144}
1145#endif /* defined(CONFIG_USER_ONLY) */
1146
1147#if !defined(CONFIG_USER_ONLY)
1148
1149static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1150                             uint16_t section);
1151static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1152
1153static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1154                               qemu_anon_ram_alloc;
1155
1156/*
1157 * Set a custom physical guest memory alloator.
1158 * Accelerators with unusual needs may need this.  Hopefully, we can
1159 * get rid of it eventually.
1160 */
1161void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1162{
1163    phys_mem_alloc = alloc;
1164}
1165
1166static uint16_t phys_section_add(PhysPageMap *map,
1167                                 MemoryRegionSection *section)
1168{
1169    /* The physical section number is ORed with a page-aligned
1170     * pointer to produce the iotlb entries.  Thus it should
1171     * never overflow into the page-aligned value.
1172     */
1173    assert(map->sections_nb < TARGET_PAGE_SIZE);
1174
1175    if (map->sections_nb == map->sections_nb_alloc) {
1176        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1177        map->sections = g_renew(MemoryRegionSection, map->sections,
1178                                map->sections_nb_alloc);
1179    }
1180    map->sections[map->sections_nb] = *section;
1181    memory_region_ref(section->mr);
1182    return map->sections_nb++;
1183}
1184
1185static void phys_section_destroy(MemoryRegion *mr)
1186{
1187    bool have_sub_page = mr->subpage;
1188
1189    memory_region_unref(mr);
1190
1191    if (have_sub_page) {
1192        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1193        object_unref(OBJECT(&subpage->iomem));
1194        g_free(subpage);
1195    }
1196}
1197
1198static void phys_sections_free(PhysPageMap *map)
1199{
1200    while (map->sections_nb > 0) {
1201        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1202        phys_section_destroy(section->mr);
1203    }
1204    g_free(map->sections);
1205    g_free(map->nodes);
1206}
1207
1208static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1209{
1210    subpage_t *subpage;
1211    hwaddr base = section->offset_within_address_space
1212        & TARGET_PAGE_MASK;
1213    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1214                                                   d->map.nodes, d->map.sections);
1215    MemoryRegionSection subsection = {
1216        .offset_within_address_space = base,
1217        .size = int128_make64(TARGET_PAGE_SIZE),
1218    };
1219    hwaddr start, end;
1220
1221    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1222
1223    if (!(existing->mr->subpage)) {
1224        subpage = subpage_init(d->as, base);
1225        subsection.address_space = d->as;
1226        subsection.mr = &subpage->iomem;
1227        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1228                      phys_section_add(&d->map, &subsection));
1229    } else {
1230        subpage = container_of(existing->mr, subpage_t, iomem);
1231    }
1232    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1233    end = start + int128_get64(section->size) - 1;
1234    subpage_register(subpage, start, end,
1235                     phys_section_add(&d->map, section));
1236}
1237
1238
1239static void register_multipage(AddressSpaceDispatch *d,
1240                               MemoryRegionSection *section)
1241{
1242    hwaddr start_addr = section->offset_within_address_space;
1243    uint16_t section_index = phys_section_add(&d->map, section);
1244    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1245                                                    TARGET_PAGE_BITS));
1246
1247    assert(num_pages);
1248    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1249}
1250
1251static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1252{
1253    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1254    AddressSpaceDispatch *d = as->next_dispatch;
1255    MemoryRegionSection now = *section, remain = *section;
1256    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1257
1258    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1259        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1260                       - now.offset_within_address_space;
1261
1262        now.size = int128_min(int128_make64(left), now.size);
1263        register_subpage(d, &now);
1264    } else {
1265        now.size = int128_zero();
1266    }
1267    while (int128_ne(remain.size, now.size)) {
1268        remain.size = int128_sub(remain.size, now.size);
1269        remain.offset_within_address_space += int128_get64(now.size);
1270        remain.offset_within_region += int128_get64(now.size);
1271        now = remain;
1272        if (int128_lt(remain.size, page_size)) {
1273            register_subpage(d, &now);
1274        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1275            now.size = page_size;
1276            register_subpage(d, &now);
1277        } else {
1278            now.size = int128_and(now.size, int128_neg(page_size));
1279            register_multipage(d, &now);
1280        }
1281    }
1282}
1283
1284void qemu_flush_coalesced_mmio_buffer(void)
1285{
1286    if (kvm_enabled())
1287        kvm_flush_coalesced_mmio_buffer();
1288}
1289
1290void qemu_mutex_lock_ramlist(void)
1291{
1292    qemu_mutex_lock(&ram_list.mutex);
1293}
1294
1295void qemu_mutex_unlock_ramlist(void)
1296{
1297    qemu_mutex_unlock(&ram_list.mutex);
1298}
1299
1300static void *file_ram_alloc(RAMBlock *block,
1301                            ram_addr_t memory,
1302                            const char *path,
1303                            Error **errp)
1304{
1305    bool unlink_on_error = false;
1306    char *filename;
1307    char *sanitized_name;
1308    char *c;
1309    void *area;
1310    int fd = -1;
1311    int64_t page_size;
1312
1313
1314    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1315        error_setg(errp,
1316                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1317        return NULL;
1318    }
1319
1320    for (;;) {
1321        fd = open(path, O_RDWR);
1322        if (fd >= 0) {
1323            /* @path names an existing file, use it */
1324            break;
1325        }
1326        if (errno == ENOENT) {
1327            /* @path names a file that doesn't exist, create it */
1328            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1329            if (fd >= 0) {
1330                unlink_on_error = true;
1331                break;
1332            }
1333        } else if (errno == EISDIR) {
1334            /* @path names a directory, create a file there */
1335            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1336            sanitized_name = g_strdup(memory_region_name(block->mr));
1337            for (c = sanitized_name; *c != '\0'; c++) {
1338                if (*c == '/') {
1339                    *c = '_';
1340                }
1341            }
1342
1343            filename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "" \
1344                                       "qemu_back_mem.%s.XXXXXX", path,
1345                                       sanitized_name);
1346            g_free(sanitized_name);
1347
1348#ifdef _WIN32
1349            /* mktemp is deprecated on Windows, we should move to this instead:
1350             * fd = open(_mktemp(filename));
1351             */
1352            fd = mktemp(filename);
1353#else
1354            fd = mkstemp(filename);
1355#endif
1356            if (fd >= 0) {
1357                unlink(filename);
1358                g_free(filename);
1359                break;
1360            }
1361            g_free(filename);
1362        }
1363        if (errno != EEXIST && errno != EINTR) {
1364            error_setg_errno(errp, errno,
1365                             "can't open backing store %s for guest RAM",
1366                             path);
1367            goto error;
1368        }
1369        /*
1370         * Try again on EINTR and EEXIST.  The latter happens when
1371         * something else creates the file between our two open().
1372         */
1373    }
1374
1375#ifdef _WIN32
1376    SYSTEM_INFO SysInfo;
1377    GetSystemInfo(&SysInfo);
1378    page_size = SysInfo.dwPageSize;
1379#else
1380    page_size = qemu_fd_getpagesize(fd);
1381#endif
1382    block->mr->align = page_size;
1383
1384    if (memory < page_size) {
1385        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1386                   "or larger than page size 0x%" PRIx64,
1387                   memory, page_size);
1388        goto error;
1389    }
1390
1391    memory = ROUND_UP(memory, page_size);
1392
1393    /*
1394     * ftruncate is not supported by hugetlbfs in older
1395     * hosts, so don't bother bailing out on errors.
1396     * If anything goes wrong with it under other filesystems,
1397     * mmap will fail.
1398     */
1399    if (ftruncate(fd, memory)) {
1400        perror("ftruncate");
1401    }
1402
1403#ifdef _WIN32
1404    HANDLE fd_temp = (HANDLE)_get_osfhandle(fd);
1405    HANDLE hMapFile = CreateFileMapping(fd_temp, NULL, PAGE_READWRITE,
1406                                        0, memory, NULL);
1407    area = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, 0);
1408    if (area == NULL) {
1409#else
1410    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1411    if (area == MAP_FAILED) {
1412#endif
1413        error_setg_errno(errp, errno,
1414                         "unable to map backing store for guest RAM");
1415        goto error;
1416    }
1417
1418    if (mem_prealloc) {
1419        os_mem_prealloc(fd, area, memory);
1420    }
1421
1422    block->fd = fd;
1423    return area;
1424
1425error:
1426    if (unlink_on_error) {
1427        unlink(path);
1428    }
1429    if (fd != -1) {
1430        close(fd);
1431    }
1432    return NULL;
1433}
1434
1435/* Called with the ramlist lock held.  */
1436static ram_addr_t find_ram_offset(ram_addr_t size)
1437{
1438    RAMBlock *block, *next_block;
1439    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1440
1441    assert(size != 0); /* it would hand out same offset multiple times */
1442
1443    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1444        return 0;
1445    }
1446
1447    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1448        ram_addr_t end, next = RAM_ADDR_MAX;
1449
1450        end = block->offset + block->max_length;
1451
1452        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1453            if (next_block->offset >= end) {
1454                next = MIN(next, next_block->offset);
1455            }
1456        }
1457        if (next - end >= size && next - end < mingap) {
1458            offset = end;
1459            mingap = next - end;
1460        }
1461    }
1462
1463    if (offset == RAM_ADDR_MAX) {
1464        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1465                (uint64_t)size);
1466        abort();
1467    }
1468
1469    return offset;
1470}
1471
1472ram_addr_t last_ram_offset(void)
1473{
1474    RAMBlock *block;
1475    ram_addr_t last = 0;
1476
1477    rcu_read_lock();
1478    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1479        last = MAX(last, block->offset + block->max_length);
1480    }
1481    rcu_read_unlock();
1482    return last;
1483}
1484
1485static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1486{
1487    int ret;
1488
1489    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1490    if (!machine_dump_guest_core(current_machine)) {
1491        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1492        if (ret) {
1493            perror("qemu_madvise");
1494            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1495                            "but dump_guest_core=off specified\n");
1496        }
1497    }
1498}
1499
1500/* Called within an RCU critical section, or while the ramlist lock
1501 * is held.
1502 */
1503static RAMBlock *find_ram_block(ram_addr_t addr)
1504{
1505    RAMBlock *block;
1506
1507    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1508        if (block->offset == addr) {
1509            return block;
1510        }
1511    }
1512
1513    return NULL;
1514}
1515
1516const char *qemu_ram_get_idstr(RAMBlock *rb)
1517{
1518    return rb->idstr;
1519}
1520
1521/* Called with iothread lock held.  */
1522void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1523{
1524    RAMBlock *new_block, *block;
1525
1526    rcu_read_lock();
1527    new_block = find_ram_block(addr);
1528    assert(new_block);
1529    assert(!new_block->idstr[0]);
1530
1531    if (dev) {
1532        char *id = qdev_get_dev_path(dev);
1533        if (id) {
1534            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1535            g_free(id);
1536        }
1537    }
1538    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1539
1540    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1541        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1542            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1543                    new_block->idstr);
1544            abort();
1545        }
1546    }
1547    rcu_read_unlock();
1548}
1549
1550/* Called with iothread lock held.  */
1551void qemu_ram_unset_idstr(ram_addr_t addr)
1552{
1553    RAMBlock *block;
1554
1555    /* FIXME: arch_init.c assumes that this is not called throughout
1556     * migration.  Ignore the problem since hot-unplug during migration
1557     * does not work anyway.
1558     */
1559
1560    rcu_read_lock();
1561    block = find_ram_block(addr);
1562    if (block) {
1563        memset(block->idstr, 0, sizeof(block->idstr));
1564    }
1565    rcu_read_unlock();
1566}
1567
1568static int memory_try_enable_merging(void *addr, size_t len)
1569{
1570    if (!machine_mem_merge(current_machine)) {
1571        /* disabled by the user */
1572        return 0;
1573    }
1574
1575    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1576}
1577
1578/* Only legal before guest might have detected the memory size: e.g. on
1579 * incoming migration, or right after reset.
1580 *
1581 * As memory core doesn't know how is memory accessed, it is up to
1582 * resize callback to update device state and/or add assertions to detect
1583 * misuse, if necessary.
1584 */
1585int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1586{
1587    RAMBlock *block = find_ram_block(base);
1588
1589    assert(block);
1590
1591    newsize = HOST_PAGE_ALIGN(newsize);
1592
1593    if (block->used_length == newsize) {
1594        return 0;
1595    }
1596
1597    if (!(block->flags & RAM_RESIZEABLE)) {
1598        error_setg_errno(errp, EINVAL,
1599                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1600                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1601                         newsize, block->used_length);
1602        return -EINVAL;
1603    }
1604
1605    if (block->max_length < newsize) {
1606        error_setg_errno(errp, EINVAL,
1607                         "Length too large: %s: 0x" RAM_ADDR_FMT
1608                         " > 0x" RAM_ADDR_FMT, block->idstr,
1609                         newsize, block->max_length);
1610        return -EINVAL;
1611    }
1612
1613    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1614    block->used_length = newsize;
1615    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1616                                        DIRTY_CLIENTS_ALL);
1617    memory_region_set_size(block->mr, newsize);
1618    if (block->resized) {
1619        block->resized(block->idstr, newsize, block->host);
1620    }
1621    return 0;
1622}
1623
1624/* Called with ram_list.mutex held */
1625static void dirty_memory_extend(ram_addr_t old_ram_size,
1626                                ram_addr_t new_ram_size)
1627{
1628    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1629                                             DIRTY_MEMORY_BLOCK_SIZE);
1630    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1631                                             DIRTY_MEMORY_BLOCK_SIZE);
1632    int i;
1633
1634    /* Only need to extend if block count increased */
1635    if (new_num_blocks <= old_num_blocks) {
1636        return;
1637    }
1638
1639    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1640        DirtyMemoryBlocks *old_blocks;
1641        DirtyMemoryBlocks *new_blocks;
1642        int j;
1643
1644        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1645        new_blocks = g_malloc(sizeof(*new_blocks) +
1646                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1647
1648        if (old_num_blocks) {
1649            memcpy(new_blocks->blocks, old_blocks->blocks,
1650                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1651        }
1652
1653        for (j = old_num_blocks; j < new_num_blocks; j++) {
1654            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1655        }
1656
1657        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1658
1659        if (old_blocks) {
1660            g_free_rcu(old_blocks, rcu);
1661        }
1662    }
1663}
1664
1665static void ram_block_add(RAMBlock *new_block, Error **errp)
1666{
1667    RAMBlock *block;
1668    RAMBlock *last_block = NULL;
1669    ram_addr_t old_ram_size, new_ram_size;
1670    Error *err = NULL;
1671
1672    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1673
1674    qemu_mutex_lock_ramlist();
1675    new_block->offset = find_ram_offset(new_block->max_length);
1676
1677    if (!new_block->host) {
1678        if (xen_enabled()) {
1679            xen_ram_alloc(new_block->offset, new_block->max_length,
1680                          new_block->mr, &err);
1681            if (err) {
1682                error_propagate(errp, err);
1683                qemu_mutex_unlock_ramlist();
1684                return;
1685            }
1686        } else {
1687            new_block->host = phys_mem_alloc(new_block->max_length,
1688                                             &new_block->mr->align);
1689            if (!new_block->host) {
1690                error_setg_errno(errp, errno,
1691                                 "cannot set up guest memory '%s'",
1692                                 memory_region_name(new_block->mr));
1693                qemu_mutex_unlock_ramlist();
1694                return;
1695            }
1696            memory_try_enable_merging(new_block->host, new_block->max_length);
1697        }
1698    }
1699
1700    new_ram_size = MAX(old_ram_size,
1701              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1702    if (new_ram_size > old_ram_size) {
1703        migration_bitmap_extend(old_ram_size, new_ram_size);
1704        dirty_memory_extend(old_ram_size, new_ram_size);
1705    }
1706    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1707     * QLIST (which has an RCU-friendly variant) does not have insertion at
1708     * tail, so save the last element in last_block.
1709     */
1710    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1711        last_block = block;
1712        if (block->max_length < new_block->max_length) {
1713            break;
1714        }
1715    }
1716    if (block) {
1717        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1718    } else if (last_block) {
1719        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1720    } else { /* list is empty */
1721        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1722    }
1723    ram_list.mru_block = NULL;
1724
1725    /* Write list before version */
1726    smp_wmb();
1727    ram_list.version++;
1728    qemu_mutex_unlock_ramlist();
1729
1730    cpu_physical_memory_set_dirty_range(new_block->offset,
1731                                        new_block->used_length,
1732                                        DIRTY_CLIENTS_ALL);
1733
1734    if (new_block->host) {
1735        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1736        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1737        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1738        if (kvm_enabled()) {
1739            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1740        }
1741    }
1742}
1743
1744RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1745                                   bool share, const char *mem_path,
1746                                   Error **errp)
1747{
1748    RAMBlock *new_block;
1749    Error *local_err = NULL;
1750
1751    if (xen_enabled()) {
1752        error_setg(errp, "-mem-path not supported with Xen");
1753        return NULL;
1754    }
1755
1756    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1757        /*
1758         * file_ram_alloc() needs to allocate just like
1759         * phys_mem_alloc, but we haven't bothered to provide
1760         * a hook there.
1761         */
1762        error_setg(errp,
1763                   "-mem-path not supported with this accelerator");
1764        return NULL;
1765    }
1766
1767    size = HOST_PAGE_ALIGN(size);
1768    new_block = g_malloc0(sizeof(*new_block));
1769    new_block->mr = mr;
1770    new_block->used_length = size;
1771    new_block->max_length = size;
1772    new_block->flags = share ? RAM_SHARED : 0;
1773    new_block->flags |= RAM_FILE;
1774    new_block->host = file_ram_alloc(new_block, size,
1775                                     mem_path, errp);
1776    if (!new_block->host) {
1777        g_free(new_block);
1778        return NULL;
1779    }
1780
1781    ram_block_add(new_block, &local_err);
1782    if (local_err) {
1783        g_free(new_block);
1784        error_propagate(errp, local_err);
1785        return NULL;
1786    }
1787    return new_block;
1788}
1789
1790static
1791RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1792                                  void (*resized)(const char*,
1793                                                  uint64_t length,
1794                                                  void *host),
1795                                  void *host, bool resizeable,
1796                                  MemoryRegion *mr, Error **errp)
1797{
1798    RAMBlock *new_block;
1799    Error *local_err = NULL;
1800
1801    size = HOST_PAGE_ALIGN(size);
1802    max_size = HOST_PAGE_ALIGN(max_size);
1803    new_block = g_malloc0(sizeof(*new_block));
1804    new_block->mr = mr;
1805    new_block->resized = resized;
1806    new_block->used_length = size;
1807    new_block->max_length = max_size;
1808    assert(max_size >= size);
1809    new_block->fd = -1;
1810
1811    new_block->host = host;
1812    if (host) {
1813        new_block->flags |= RAM_PREALLOC;
1814    }
1815    if (resizeable) {
1816        new_block->flags |= RAM_RESIZEABLE;
1817    }
1818    ram_block_add(new_block, &local_err);
1819    if (local_err) {
1820        g_free(new_block);
1821        error_propagate(errp, local_err);
1822        return NULL;
1823    }
1824    return new_block;
1825}
1826
1827RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1828                                   MemoryRegion *mr, Error **errp)
1829{
1830    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1831}
1832
1833RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1834{
1835    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1836}
1837
1838RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1839                                     void (*resized)(const char*,
1840                                                     uint64_t length,
1841                                                     void *host),
1842                                     MemoryRegion *mr, Error **errp)
1843{
1844    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1845}
1846
1847static void reclaim_ramblock(RAMBlock *block)
1848{
1849    if (block->flags & RAM_PREALLOC) {
1850        ;
1851    } else if (xen_enabled()) {
1852        xen_invalidate_map_cache_entry(block->host);
1853    } else if (block->fd >= 0) {
1854#ifdef _WIN32
1855        if (block->host) {
1856            UnmapViewOfFile(block->host);
1857        }
1858#else
1859        qemu_ram_munmap(block->host, block->max_length);
1860#endif
1861        close(block->fd);
1862    } else {
1863        qemu_anon_ram_free(block->host, block->max_length);
1864    }
1865    g_free(block);
1866}
1867
1868void qemu_ram_free(RAMBlock *block)
1869{
1870    if (!block) {
1871        return;
1872    }
1873
1874    qemu_mutex_lock_ramlist();
1875    QLIST_REMOVE_RCU(block, next);
1876    ram_list.mru_block = NULL;
1877    /* Write list before version */
1878    smp_wmb();
1879    ram_list.version++;
1880    call_rcu(block, reclaim_ramblock, rcu);
1881    qemu_mutex_unlock_ramlist();
1882}
1883
1884#ifndef _WIN32
1885void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1886{
1887    RAMBlock *block;
1888    ram_addr_t offset;
1889    int flags;
1890    void *area, *vaddr;
1891
1892    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1893        offset = addr - block->offset;
1894        if (offset < block->max_length) {
1895            vaddr = ramblock_ptr(block, offset);
1896            if (block->flags & RAM_PREALLOC) {
1897                ;
1898            } else if (xen_enabled()) {
1899                abort();
1900            } else {
1901                flags = MAP_FIXED;
1902                if (block->fd >= 0) {
1903                    flags |= (block->flags & RAM_SHARED ?
1904                              MAP_SHARED : MAP_PRIVATE);
1905                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1906                                flags, block->fd, offset);
1907                } else {
1908                    /*
1909                     * Remap needs to match alloc.  Accelerators that
1910                     * set phys_mem_alloc never remap.  If they did,
1911                     * we'd need a remap hook here.
1912                     */
1913                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1914
1915                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1916                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1917                                flags, -1, 0);
1918                }
1919                if (area != vaddr) {
1920                    fprintf(stderr, "Could not remap addr: "
1921                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1922                            length, addr);
1923                    exit(1);
1924                }
1925                memory_try_enable_merging(vaddr, length);
1926                qemu_ram_setup_dump(vaddr, length);
1927            }
1928        }
1929    }
1930}
1931#endif /* !_WIN32 */
1932
1933int qemu_get_ram_fd(ram_addr_t addr)
1934{
1935    RAMBlock *block;
1936    int fd;
1937
1938    rcu_read_lock();
1939    block = qemu_get_ram_block(addr);
1940    fd = block->fd;
1941    rcu_read_unlock();
1942    return fd;
1943}
1944
1945void qemu_set_ram_fd(ram_addr_t addr, int fd)
1946{
1947    RAMBlock *block;
1948
1949    rcu_read_lock();
1950    block = qemu_get_ram_block(addr);
1951    block->fd = fd;
1952    rcu_read_unlock();
1953}
1954
1955void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1956{
1957    RAMBlock *block;
1958    void *ptr;
1959
1960    rcu_read_lock();
1961    block = qemu_get_ram_block(addr);
1962    ptr = ramblock_ptr(block, 0);
1963    rcu_read_unlock();
1964    return ptr;
1965}
1966
1967/* Return a host pointer to ram allocated with qemu_ram_alloc.
1968 * This should not be used for general purpose DMA.  Use address_space_map
1969 * or address_space_rw instead. For local memory (e.g. video ram) that the
1970 * device owns, use memory_region_get_ram_ptr.
1971 *
1972 * Called within RCU critical section.
1973 */
1974void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1975{
1976    RAMBlock *block = ram_block;
1977
1978    if (block == NULL) {
1979        block = qemu_get_ram_block(addr);
1980    }
1981
1982    if (xen_enabled() && block->host == NULL) {
1983        /* We need to check if the requested address is in the RAM
1984         * because we don't want to map the entire memory in QEMU.
1985         * In that case just map until the end of the page.
1986         */
1987        if (block->offset == 0) {
1988            return xen_map_cache(addr, 0, 0);
1989        }
1990
1991        block->host = xen_map_cache(block->offset, block->max_length, 1);
1992    }
1993    return ramblock_ptr(block, addr - block->offset);
1994}
1995
1996/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1997 * but takes a size argument.
1998 *
1999 * Called within RCU critical section.
2000 */
2001static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
2002                                 hwaddr *size)
2003{
2004    RAMBlock *block = ram_block;
2005    ram_addr_t offset_inside_block;
2006    if (*size == 0) {
2007        return NULL;
2008    }
2009
2010    if (block == NULL) {
2011        block = qemu_get_ram_block(addr);
2012    }
2013    offset_inside_block = addr - block->offset;
2014    *size = MIN(*size, block->max_length - offset_inside_block);
2015
2016    if (xen_enabled() && block->host == NULL) {
2017        /* We need to check if the requested address is in the RAM
2018         * because we don't want to map the entire memory in QEMU.
2019         * In that case just map the requested area.
2020         */
2021        if (block->offset == 0) {
2022            return xen_map_cache(addr, *size, 1);
2023        }
2024
2025        block->host = xen_map_cache(block->offset, block->max_length, 1);
2026    }
2027
2028    return ramblock_ptr(block, offset_inside_block);
2029}
2030
2031/*
2032 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
2033 * in that RAMBlock.
2034 *
2035 * ptr: Host pointer to look up
2036 * round_offset: If true round the result offset down to a page boundary
2037 * *ram_addr: set to result ram_addr
2038 * *offset: set to result offset within the RAMBlock
2039 *
2040 * Returns: RAMBlock (or NULL if not found)
2041 *
2042 * By the time this function returns, the returned pointer is not protected
2043 * by RCU anymore.  If the caller is not within an RCU critical section and
2044 * does not hold the iothread lock, it must have other means of protecting the
2045 * pointer, such as a reference to the region that includes the incoming
2046 * ram_addr_t.
2047 */
2048RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
2049                                   ram_addr_t *ram_addr,
2050                                   ram_addr_t *offset)
2051{
2052    RAMBlock *block;
2053    uint8_t *host = ptr;
2054
2055    if (xen_enabled()) {
2056        rcu_read_lock();
2057        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2058        block = qemu_get_ram_block(*ram_addr);
2059        if (block) {
2060            *offset = (host - block->host);
2061        }
2062        rcu_read_unlock();
2063        return block;
2064    }
2065
2066    rcu_read_lock();
2067    block = atomic_rcu_read(&ram_list.mru_block);
2068    if (block && block->host && host - block->host < block->max_length) {
2069        goto found;
2070    }
2071
2072    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2073        /* This case append when the block is not mapped. */
2074        if (block->host == NULL) {
2075            continue;
2076        }
2077        if (host - block->host < block->max_length) {
2078            goto found;
2079        }
2080    }
2081
2082    rcu_read_unlock();
2083    return NULL;
2084
2085found:
2086    *offset = (host - block->host);
2087    if (round_offset) {
2088        *offset &= TARGET_PAGE_MASK;
2089    }
2090    *ram_addr = block->offset + *offset;
2091    rcu_read_unlock();
2092    return block;
2093}
2094
2095/*
2096 * Finds the named RAMBlock
2097 *
2098 * name: The name of RAMBlock to find
2099 *
2100 * Returns: RAMBlock (or NULL if not found)
2101 */
2102RAMBlock *qemu_ram_block_by_name(const char *name)
2103{
2104    RAMBlock *block;
2105
2106    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2107        if (!strcmp(name, block->idstr)) {
2108            return block;
2109        }
2110    }
2111
2112    return NULL;
2113}
2114
2115/* Some of the softmmu routines need to translate from a host pointer
2116   (typically a TLB entry) back to a ram offset.  */
2117MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2118{
2119    RAMBlock *block;
2120    ram_addr_t offset; /* Not used */
2121
2122    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2123
2124    if (!block) {
2125        return NULL;
2126    }
2127
2128    return block->mr;
2129}
2130
2131/* Called within RCU critical section.  */
2132static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2133                               uint64_t val, unsigned size)
2134{
2135    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2136        tb_invalidate_phys_page_fast(ram_addr, size);
2137    }
2138    switch (size) {
2139    case 1:
2140        stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2141        break;
2142    case 2:
2143        stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2144        break;
2145    case 4:
2146        stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2147        break;
2148    default:
2149        abort();
2150    }
2151    /* Set both VGA and migration bits for simplicity and to remove
2152     * the notdirty callback faster.
2153     */
2154    cpu_physical_memory_set_dirty_range(ram_addr, size,
2155                                        DIRTY_CLIENTS_NOCODE);
2156    /* we remove the notdirty callback only if the code has been
2157       flushed */
2158    if (!cpu_physical_memory_is_clean(ram_addr)) {
2159        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2160    }
2161}
2162
2163static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2164                                 unsigned size, bool is_write)
2165{
2166    return is_write;
2167}
2168
2169static const MemoryRegionOps notdirty_mem_ops = {
2170    .write = notdirty_mem_write,
2171    .valid.accepts = notdirty_mem_accepts,
2172    .endianness = DEVICE_NATIVE_ENDIAN,
2173};
2174
2175/* Generate a debug exception if a watchpoint has been hit.  */
2176static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2177{
2178    CPUState *cpu = current_cpu;
2179    CPUClass *cc = CPU_GET_CLASS(cpu);
2180    CPUArchState *env = cpu->env_ptr;
2181    target_ulong pc, cs_base;
2182    target_ulong vaddr;
2183    CPUWatchpoint *wp;
2184    int cpu_flags;
2185
2186    if (cpu->watchpoint_hit) {
2187        /* We re-entered the check after replacing the TB. Now raise
2188         * the debug interrupt so that is will trigger after the
2189         * current instruction. */
2190        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2191        return;
2192    }
2193    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2194    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2195        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2196            && (wp->flags & flags)) {
2197            if (flags == BP_MEM_READ) {
2198                wp->flags |= BP_WATCHPOINT_HIT_READ;
2199            } else {
2200                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2201            }
2202            wp->hitaddr = vaddr;
2203            wp->hitattrs = attrs;
2204            if (!cpu->watchpoint_hit) {
2205                if (wp->flags & BP_CPU &&
2206                    !cc->debug_check_watchpoint(cpu, wp)) {
2207                    wp->flags &= ~BP_WATCHPOINT_HIT;
2208                    continue;
2209                }
2210                cpu->watchpoint_hit = wp;
2211                tb_check_watchpoint(cpu);
2212                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2213                    cpu->exception_index = EXCP_DEBUG;
2214                    cpu_loop_exit(cpu);
2215                } else {
2216                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2217                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2218                    cpu_resume_from_signal(cpu, NULL);
2219                }
2220            }
2221        } else {
2222            wp->flags &= ~BP_WATCHPOINT_HIT;
2223        }
2224    }
2225}
2226
2227/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2228   so these check for a hit then pass through to the normal out-of-line
2229   phys routines.  */
2230static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2231                                  unsigned size, MemTxAttrs attrs)
2232{
2233    MemTxResult res;
2234    uint64_t data;
2235    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2236    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2237
2238    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2239    switch (size) {
2240    case 1:
2241        data = address_space_ldub(as, addr, attrs, &res);
2242        break;
2243    case 2:
2244        data = address_space_lduw(as, addr, attrs, &res);
2245        break;
2246    case 4:
2247        data = address_space_ldl(as, addr, attrs, &res);
2248        break;
2249    default: abort();
2250    }
2251    *pdata = data;
2252    return res;
2253}
2254
2255static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2256                                   uint64_t val, unsigned size,
2257                                   MemTxAttrs attrs)
2258{
2259    MemTxResult res;
2260    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2261    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2262
2263    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2264    switch (size) {
2265    case 1:
2266        address_space_stb(as, addr, val, attrs, &res);
2267        break;
2268    case 2:
2269        address_space_stw(as, addr, val, attrs, &res);
2270        break;
2271    case 4:
2272        address_space_stl(as, addr, val, attrs, &res);
2273        break;
2274    default: abort();
2275    }
2276    return res;
2277}
2278
2279static const MemoryRegionOps watch_mem_ops = {
2280    .read_with_attrs = watch_mem_read,
2281    .write_with_attrs = watch_mem_write,
2282    .endianness = DEVICE_NATIVE_ENDIAN,
2283};
2284
2285static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2286                                unsigned len, MemTxAttrs attrs)
2287{
2288    subpage_t *subpage = opaque;
2289    uint8_t buf[8];
2290    MemTxResult res;
2291
2292#if defined(DEBUG_SUBPAGE)
2293    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2294           subpage, len, addr);
2295#endif
2296    res = address_space_read(subpage->as, addr + subpage->base,
2297                             attrs, buf, len);
2298    if (res) {
2299        return res;
2300    }
2301    switch (len) {
2302    case 1:
2303        *data = ldub_p(buf);
2304        return MEMTX_OK;
2305    case 2:
2306        *data = lduw_p(buf);
2307        return MEMTX_OK;
2308    case 4:
2309        *data = ldl_p(buf);
2310        return MEMTX_OK;
2311    case 8:
2312        *data = ldq_p(buf);
2313        return MEMTX_OK;
2314    default:
2315        abort();
2316    }
2317}
2318
2319static MemTxResult subpage_write(void *opaque, hwaddr addr,
2320                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2321{
2322    subpage_t *subpage = opaque;
2323    uint8_t buf[8];
2324
2325#if defined(DEBUG_SUBPAGE)
2326    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2327           " value %"PRIx64"\n",
2328           __func__, subpage, len, addr, value);
2329#endif
2330    switch (len) {
2331    case 1:
2332        stb_p(buf, value);
2333        break;
2334    case 2:
2335        stw_p(buf, value);
2336        break;
2337    case 4:
2338        stl_p(buf, value);
2339        break;
2340    case 8:
2341        stq_p(buf, value);
2342        break;
2343    default:
2344        abort();
2345    }
2346    return address_space_write(subpage->as, addr + subpage->base,
2347                               attrs, buf, len);
2348}
2349
2350static bool subpage_accepts(void *opaque, hwaddr addr,
2351                            unsigned len, bool is_write)
2352{
2353    subpage_t *subpage = opaque;
2354#if defined(DEBUG_SUBPAGE)
2355    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2356           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2357#endif
2358
2359    return address_space_access_valid(subpage->as, addr + subpage->base,
2360                                      len, is_write, MEMTXATTRS_UNSPECIFIED);
2361}
2362
2363static bool subpage_accepts_tr(MemoryTransaction *tr)
2364{
2365    MemTxAttrs attr = tr->attr;
2366    subpage_t *subpage = tr->opaque;
2367    hwaddr addr = tr->addr;
2368    unsigned len = tr->size;
2369    bool is_write = tr->rw;
2370
2371#if defined(DEBUG_SUBPAGE)
2372    fprintf(stderr, "%s: subpage %p %c len %u addr " TARGET_FMT_plx
2373            " secure: %d\n",
2374           __func__, subpage, is_write ? 'w' : 'r', len, addr, attr.secure);
2375#endif
2376
2377    return address_space_access_valid(subpage->as, addr + subpage->base,
2378                                      len, is_write, attr);
2379}
2380
2381static const MemoryRegionOps subpage_ops = {
2382    .read_with_attrs = subpage_read,
2383    .write_with_attrs = subpage_write,
2384    .impl.min_access_size = 1,
2385    .impl.max_access_size = 8,
2386    .valid.min_access_size = 1,
2387    .valid.max_access_size = 8,
2388    .valid.accepts = subpage_accepts,
2389    .valid.accepts_tr = subpage_accepts_tr,
2390    .endianness = DEVICE_NATIVE_ENDIAN,
2391};
2392
2393static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2394                             uint16_t section)
2395{
2396    int idx, eidx;
2397
2398    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2399        return -1;
2400    idx = SUBPAGE_IDX(start);
2401    eidx = SUBPAGE_IDX(end);
2402#if defined(DEBUG_SUBPAGE)
2403    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2404           __func__, mmio, start, end, idx, eidx, section);
2405#endif
2406    for (; idx <= eidx; idx++) {
2407        mmio->sub_section[idx] = section;
2408    }
2409
2410    return 0;
2411}
2412
2413static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2414{
2415    subpage_t *mmio;
2416
2417    mmio = g_malloc0(sizeof(subpage_t));
2418
2419    mmio->as = as;
2420    mmio->base = base;
2421    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2422                          NULL, TARGET_PAGE_SIZE);
2423    mmio->iomem.subpage = true;
2424#if defined(DEBUG_SUBPAGE)
2425    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2426           mmio, base, TARGET_PAGE_SIZE);
2427#endif
2428    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2429
2430    return mmio;
2431}
2432
2433static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2434                              MemoryRegion *mr)
2435{
2436    assert(as);
2437    MemoryRegionSection section = {
2438        .address_space = as,
2439        .mr = mr,
2440        .offset_within_address_space = 0,
2441        .offset_within_region = 0,
2442        .size = int128_2_64(),
2443    };
2444
2445    return phys_section_add(map, &section);
2446}
2447
2448MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2449{
2450    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2451    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2452    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2453    MemoryRegionSection *sections = d->map.sections;
2454
2455    return sections[index & ~TARGET_PAGE_MASK].mr;
2456}
2457
2458static void io_mem_init(void)
2459{
2460    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2461    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2462                          NULL, UINT64_MAX);
2463    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2464                          NULL, UINT64_MAX);
2465    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2466                          NULL, UINT64_MAX);
2467}
2468
2469static void mem_begin(MemoryListener *listener)
2470{
2471    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2472    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2473    uint16_t n;
2474
2475    n = dummy_section(&d->map, as, &io_mem_unassigned);
2476    assert(n == PHYS_SECTION_UNASSIGNED);
2477    n = dummy_section(&d->map, as, &io_mem_notdirty);
2478    assert(n == PHYS_SECTION_NOTDIRTY);
2479    n = dummy_section(&d->map, as, &io_mem_rom);
2480    assert(n == PHYS_SECTION_ROM);
2481    n = dummy_section(&d->map, as, &io_mem_watch);
2482    assert(n == PHYS_SECTION_WATCH);
2483
2484    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2485    d->as = as;
2486    as->next_dispatch = d;
2487}
2488
2489static void address_space_dispatch_free(AddressSpaceDispatch *d)
2490{
2491    phys_sections_free(&d->map);
2492    g_free(d);
2493}
2494
2495static void mem_commit(MemoryListener *listener)
2496{
2497    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2498    AddressSpaceDispatch *cur = as->dispatch;
2499    AddressSpaceDispatch *next = as->next_dispatch;
2500
2501    phys_page_compact_all(next, next->map.nodes_nb);
2502
2503    atomic_rcu_set(&as->dispatch, next);
2504    if (cur) {
2505        call_rcu(cur, address_space_dispatch_free, rcu);
2506    }
2507}
2508
2509static void tcg_commit(MemoryListener *listener)
2510{
2511    CPUAddressSpace *cpuas;
2512    AddressSpaceDispatch *d;
2513
2514    /* since each CPU stores ram addresses in its TLB cache, we must
2515       reset the modified entries */
2516    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2517    cpu_reloading_memory_map();
2518    /* The CPU and TLB are protected by the iothread lock.
2519     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2520     * may have split the RCU critical section.
2521     */
2522    d = atomic_rcu_read(&cpuas->as->dispatch);
2523    cpuas->memory_dispatch = d;
2524    tlb_flush(cpuas->cpu, 1);
2525}
2526
2527void address_space_init_dispatch(AddressSpace *as)
2528{
2529    as->dispatch = NULL;
2530    as->dispatch_listener = (MemoryListener) {
2531        .begin = mem_begin,
2532        .commit = mem_commit,
2533        .region_add = mem_add,
2534        .region_nop = mem_add,
2535        .priority = 0,
2536    };
2537    memory_listener_register(&as->dispatch_listener, as);
2538}
2539
2540void address_space_unregister(AddressSpace *as)
2541{
2542    memory_listener_unregister(&as->dispatch_listener);
2543}
2544
2545void address_space_destroy_dispatch(AddressSpace *as)
2546{
2547    AddressSpaceDispatch *d = as->dispatch;
2548
2549    atomic_rcu_set(&as->dispatch, NULL);
2550    if (d) {
2551        call_rcu(d, address_space_dispatch_free, rcu);
2552    }
2553}
2554
2555static void memory_map_init(void)
2556{
2557    system_memory = g_malloc(sizeof(*system_memory));
2558
2559    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2560    address_space_init(&address_space_memory, system_memory, "memory");
2561
2562    system_io = g_malloc(sizeof(*system_io));
2563    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2564                          65536);
2565    address_space_init(&address_space_io, system_io, "I/O");
2566}
2567
2568MemoryRegion *get_system_memory(void)
2569{
2570    return system_memory;
2571}
2572
2573MemoryRegion *get_system_io(void)
2574{
2575    return system_io;
2576}
2577
2578#endif /* !defined(CONFIG_USER_ONLY) */
2579
2580/* physical memory access (slow version, mainly for debug) */
2581#if defined(CONFIG_USER_ONLY)
2582int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2583                        uint8_t *buf, int len, int is_write)
2584{
2585    int l, flags;
2586    target_ulong page;
2587    void * p;
2588
2589    while (len > 0) {
2590        page = addr & TARGET_PAGE_MASK;
2591        l = (page + TARGET_PAGE_SIZE) - addr;
2592        if (l > len)
2593            l = len;
2594        flags = page_get_flags(page);
2595        if (!(flags & PAGE_VALID))
2596            return -1;
2597        if (is_write) {
2598            if (!(flags & PAGE_WRITE))
2599                return -1;
2600            /* XXX: this code should not depend on lock_user */
2601            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2602                return -1;
2603            memcpy(p, buf, l);
2604            unlock_user(p, addr, l);
2605        } else {
2606            if (!(flags & PAGE_READ))
2607                return -1;
2608            /* XXX: this code should not depend on lock_user */
2609            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2610                return -1;
2611            memcpy(buf, p, l);
2612            unlock_user(p, addr, 0);
2613        }
2614        len -= l;
2615        buf += l;
2616        addr += l;
2617    }
2618    return 0;
2619}
2620
2621void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2622                const char *name, Error **errp)
2623{
2624}
2625
2626#else
2627
2628void cpu_set_mr(Object *obj, Visitor *v, void *opaque,
2629                const char *name, Error **errp)
2630{
2631    CPUState *cpu = CPU(obj);
2632    Error *local_err = NULL;
2633    char *path = NULL;
2634
2635    visit_type_str(v, name, &path, &local_err);
2636
2637    if (!local_err && strcmp(path, "") != 0) {
2638        cpu->memory = MEMORY_REGION(object_resolve_link(obj, name, path,
2639                                &local_err));
2640    }
2641
2642    if (local_err) {
2643        error_propagate(errp, local_err);
2644        return;
2645    }
2646
2647    object_ref(OBJECT(cpu->memory));
2648    cpu->as = address_space_init_shareable(cpu->memory, NULL);
2649}
2650
2651static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2652                                     hwaddr length)
2653{
2654    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2655    /* No early return if dirty_log_mask is or becomes 0, because
2656     * cpu_physical_memory_set_dirty_range will still call
2657     * xen_modified_memory.
2658     */
2659    if (dirty_log_mask) {
2660        dirty_log_mask =
2661            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2662    }
2663    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2664        tb_invalidate_phys_range(addr, addr + length);
2665        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2666    }
2667    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2668}
2669
2670static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2671{
2672    unsigned access_size_max = mr->ops->valid.max_access_size;
2673
2674    /* Regions are assumed to support 1-4 byte accesses unless
2675       otherwise specified.  */
2676    if (access_size_max == 0) {
2677        access_size_max = 4;
2678    }
2679
2680    /* Bound the maximum access by the alignment of the address.  */
2681    if (!mr->ops->impl.unaligned) {
2682        unsigned align_size_max = addr & -addr;
2683        if (align_size_max != 0 && align_size_max < access_size_max) {
2684            access_size_max = align_size_max;
2685        }
2686    }
2687
2688    /* Don't attempt accesses larger than the maximum.  */
2689    if (l > access_size_max) {
2690        l = access_size_max;
2691    }
2692    l = pow2floor(l);
2693
2694    return l;
2695}
2696
2697static bool prepare_mmio_access(MemoryRegion *mr)
2698{
2699    bool unlocked = !qemu_mutex_iothread_locked();
2700    bool release_lock = false;
2701
2702    if (unlocked && mr->global_locking) {
2703        qemu_mutex_lock_iothread();
2704        unlocked = false;
2705        release_lock = true;
2706    }
2707    if (mr->flush_coalesced_mmio) {
2708        if (unlocked) {
2709            qemu_mutex_lock_iothread();
2710        }
2711        qemu_flush_coalesced_mmio_buffer();
2712        if (unlocked) {
2713            qemu_mutex_unlock_iothread();
2714        }
2715    }
2716
2717    return release_lock;
2718}
2719
2720/* Called within RCU critical section.  */
2721static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2722                                                MemTxAttrs attrs,
2723                                                const uint8_t *buf,
2724                                                int len, hwaddr addr1,
2725                                                hwaddr l, MemoryRegion *mr)
2726{
2727    uint8_t *ptr;
2728    uint64_t val;
2729    MemTxResult result = MEMTX_OK;
2730    bool release_lock = false;
2731
2732    for (;;) {
2733        if (!memory_access_is_direct(mr, true)) {
2734            release_lock |= prepare_mmio_access(mr);
2735            l = memory_access_size(mr, l, addr1);
2736            /* XXX: could force current_cpu to NULL to avoid
2737               potential bugs */
2738            switch (l) {
2739            case 8:
2740                /* 64 bit write access */
2741                val = ldq_p(buf);
2742                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2743                                                       attrs);
2744                break;
2745            case 4:
2746                /* 32 bit write access */
2747                val = ldl_p(buf);
2748                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2749                                                       attrs);
2750                break;
2751            case 2:
2752                /* 16 bit write access */
2753                val = lduw_p(buf);
2754                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2755                                                       attrs);
2756                break;
2757            case 1:
2758                /* 8 bit write access */
2759                val = ldub_p(buf);
2760                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2761                                                       attrs);
2762                break;
2763            default:
2764                if (mr->ops->access) {
2765                    MemoryTransaction tr = {
2766                        .data.p8 = (uint8_t *) buf,
2767                        .rw = true,
2768                        .addr = addr1,
2769                        .size = l,
2770                        .attr = attrs,
2771                        .opaque = mr->opaque,
2772                    };
2773                    mr->ops->access(&tr);
2774                } else {
2775                    abort();
2776                }
2777            }
2778        } else {
2779            addr1 += memory_region_get_ram_addr(mr);
2780            /* RAM case */
2781            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2782            memcpy(ptr, buf, l);
2783            invalidate_and_set_dirty(mr, addr1, l);
2784        }
2785
2786        if (release_lock) {
2787            qemu_mutex_unlock_iothread();
2788            release_lock = false;
2789        }
2790
2791        len -= l;
2792        buf += l;
2793        addr += l;
2794
2795        if (!len) {
2796            break;
2797        }
2798
2799        l = len;
2800        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2801    }
2802
2803    return result;
2804}
2805
2806MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2807                                const uint8_t *buf, int len)
2808{
2809    hwaddr l;
2810    hwaddr addr1;
2811    MemoryRegion *mr;
2812    MemTxResult result = MEMTX_OK;
2813
2814    if (len > 0) {
2815        rcu_read_lock();
2816        l = len;
2817        mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
2818        result = address_space_write_continue(as, addr, attrs, buf, len,
2819                                              addr1, l, mr);
2820        rcu_read_unlock();
2821    }
2822
2823    return result;
2824}
2825
2826/* Called within RCU critical section.  */
2827MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2828                                        MemTxAttrs attrs, uint8_t *buf,
2829                                        int len, hwaddr addr1, hwaddr l,
2830                                        MemoryRegion *mr)
2831{
2832    uint8_t *ptr;
2833    uint64_t val;
2834    MemTxResult result = MEMTX_OK;
2835    bool release_lock = false;
2836
2837    for (;;) {
2838        if (!memory_access_is_direct(mr, false)) {
2839            /* I/O case */
2840            release_lock |= prepare_mmio_access(mr);
2841            l = memory_access_size(mr, l, addr1);
2842            switch (l) {
2843            case 8:
2844                /* 64 bit read access */
2845                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2846                                                      attrs);
2847                stq_p(buf, val);
2848                break;
2849            case 4:
2850                /* 32 bit read access */
2851                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2852                                                      attrs);
2853                stl_p(buf, val);
2854                break;
2855            case 2:
2856                /* 16 bit read access */
2857                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2858                                                      attrs);
2859                stw_p(buf, val);
2860                break;
2861            case 1:
2862                /* 8 bit read access */
2863                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2864                                                      attrs);
2865                stb_p(buf, val);
2866                break;
2867            default:
2868                if (mr->ops->access) {
2869                    MemoryTransaction tr = {
2870                        .data.p8 = buf,
2871                        .rw = false,
2872                        .addr = addr1,
2873                        .size = l,
2874                        .attr = attrs,
2875                        .opaque = mr->opaque,
2876                    };
2877                    mr->ops->access(&tr);
2878                } else {
2879                    abort();
2880                }
2881            }
2882        } else {
2883            /* RAM case */
2884            ptr = qemu_get_ram_ptr(mr->ram_block,
2885                                   memory_region_get_ram_addr(mr) + addr1);
2886            memcpy(buf, ptr, l);
2887        }
2888
2889        if (release_lock) {
2890            qemu_mutex_unlock_iothread();
2891            release_lock = false;
2892        }
2893
2894        len -= l;
2895        buf += l;
2896        addr += l;
2897
2898        if (!len) {
2899            break;
2900        }
2901
2902        l = len;
2903        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2904    }
2905
2906    return result;
2907}
2908
2909MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2910                                    MemTxAttrs attrs, uint8_t *buf, int len)
2911{
2912    hwaddr l;
2913    hwaddr addr1;
2914    MemoryRegion *mr;
2915    MemTxResult result = MEMTX_OK;
2916
2917    if (len > 0) {
2918        rcu_read_lock();
2919        l = len;
2920        mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
2921        result = address_space_read_continue(as, addr, attrs, buf, len,
2922                                             addr1, l, mr);
2923        rcu_read_unlock();
2924    }
2925
2926    return result;
2927}
2928
2929MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2930                             uint8_t *buf, int len, bool is_write)
2931{
2932    if (is_write) {
2933        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2934    } else {
2935        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2936    }
2937}
2938
2939void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2940                            int len, int is_write)
2941{
2942    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2943                     buf, len, is_write);
2944}
2945
2946enum write_rom_type {
2947    WRITE_DATA,
2948    FLUSH_CACHE,
2949};
2950
2951static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2952    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2953{
2954    hwaddr l;
2955    uint8_t *ptr;
2956    hwaddr addr1;
2957    MemoryRegion *mr;
2958
2959    rcu_read_lock();
2960    while (len > 0) {
2961        l = len;
2962        mr = address_space_translate(as, addr, &addr1, &l, true);
2963
2964        if (!(memory_region_is_ram(mr) ||
2965              memory_region_is_romd(mr))) {
2966            if (type == WRITE_DATA) {
2967                address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
2968                                 (uint8_t *) buf, len, true);
2969            } else {
2970                l = memory_access_size(mr, l, addr1);
2971            }
2972        } else {
2973            addr1 += memory_region_get_ram_addr(mr);
2974            /* ROM/RAM case */
2975            ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2976            switch (type) {
2977            case WRITE_DATA:
2978                memcpy(ptr, buf, l);
2979                invalidate_and_set_dirty(mr, addr1, l);
2980                break;
2981            case FLUSH_CACHE:
2982                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2983                break;
2984            }
2985        }
2986        len -= l;
2987        buf += l;
2988        addr += l;
2989    }
2990    rcu_read_unlock();
2991}
2992
2993/* used for ROM loading : can write in RAM and ROM */
2994void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2995                                   const uint8_t *buf, int len)
2996{
2997    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2998}
2999
3000void cpu_flush_icache_range(hwaddr start, int len)
3001{
3002    /*
3003     * This function should do the same thing as an icache flush that was
3004     * triggered from within the guest. For TCG we are always cache coherent,
3005     * so there is no need to flush anything. For KVM / Xen we need to flush
3006     * the host's instruction cache at least.
3007     */
3008    if (tcg_enabled()) {
3009        return;
3010    }
3011
3012    cpu_physical_memory_write_rom_internal(&address_space_memory,
3013                                           start, NULL, len, FLUSH_CACHE);
3014}
3015
3016typedef struct {
3017    MemoryRegion *mr;
3018    void *buffer;
3019    hwaddr addr;
3020    hwaddr len;
3021    bool in_use;
3022} BounceBuffer;
3023
3024static BounceBuffer bounce;
3025
3026typedef struct MapClient {
3027    QEMUBH *bh;
3028    QLIST_ENTRY(MapClient) link;
3029} MapClient;
3030
3031QemuMutex map_client_list_lock;
3032static QLIST_HEAD(map_client_list, MapClient) map_client_list
3033    = QLIST_HEAD_INITIALIZER(map_client_list);
3034
3035static void cpu_unregister_map_client_do(MapClient *client)
3036{
3037    QLIST_REMOVE(client, link);
3038    g_free(client);
3039}
3040
3041static void cpu_notify_map_clients_locked(void)
3042{
3043    MapClient *client;
3044
3045    while (!QLIST_EMPTY(&map_client_list)) {
3046        client = QLIST_FIRST(&map_client_list);
3047        qemu_bh_schedule(client->bh);
3048        cpu_unregister_map_client_do(client);
3049    }
3050}
3051
3052void cpu_register_map_client(QEMUBH *bh)
3053{
3054    MapClient *client = g_malloc(sizeof(*client));
3055
3056    qemu_mutex_lock(&map_client_list_lock);
3057    client->bh = bh;
3058    QLIST_INSERT_HEAD(&map_client_list, client, link);
3059    if (!atomic_read(&bounce.in_use)) {
3060        cpu_notify_map_clients_locked();
3061    }
3062    qemu_mutex_unlock(&map_client_list_lock);
3063}
3064
3065void cpu_exec_init_all(void)
3066{
3067    qemu_mutex_init(&ram_list.mutex);
3068    io_mem_init();
3069    memory_map_init();
3070    qemu_mutex_init(&map_client_list_lock);
3071}
3072
3073void cpu_unregister_map_client(QEMUBH *bh)
3074{
3075    MapClient *client;
3076
3077    qemu_mutex_lock(&map_client_list_lock);
3078    QLIST_FOREACH(client, &map_client_list, link) {
3079        if (client->bh == bh) {
3080            cpu_unregister_map_client_do(client);
3081            break;
3082        }
3083    }
3084    qemu_mutex_unlock(&map_client_list_lock);
3085}
3086
3087static void cpu_notify_map_clients(void)
3088{
3089    qemu_mutex_lock(&map_client_list_lock);
3090    cpu_notify_map_clients_locked();
3091    qemu_mutex_unlock(&map_client_list_lock);
3092}
3093
3094bool address_space_access_valid(AddressSpace *as, hwaddr addr,
3095                                int len, bool is_write,
3096                                MemTxAttrs attr)
3097{
3098    MemoryRegion *mr;
3099    hwaddr l, xlat;
3100
3101    rcu_read_lock();
3102    while (len > 0) {
3103        l = len;
3104        mr = address_space_translate(as, addr, &xlat, &l, is_write);
3105        if (!memory_access_is_direct(mr, is_write)) {
3106            l = memory_access_size(mr, l, addr);
3107            if (!memory_region_access_valid(mr, xlat, l, is_write, attr)) {
3108                return false;
3109            }
3110        }
3111
3112        len -= l;
3113        addr += l;
3114    }
3115    rcu_read_unlock();
3116    return true;
3117}
3118
3119/* Map a physical memory region into a host virtual address.
3120 * May map a subset of the requested range, given by and returned in *plen.
3121 * May return NULL if resources needed to perform the mapping are exhausted.
3122 * Use only for reads OR writes - not for read-modify-write operations.
3123 * Use cpu_register_map_client() to know when retrying the map operation is
3124 * likely to succeed.
3125 */
3126void *address_space_map(AddressSpace *as,
3127                        hwaddr addr,
3128                        hwaddr *plen,
3129                        bool is_write)
3130{
3131    hwaddr len = *plen;
3132    hwaddr done = 0;
3133    hwaddr l, xlat, base;
3134    MemoryRegion *mr, *this_mr;
3135    ram_addr_t raddr;
3136    void *ptr;
3137
3138    if (len == 0) {
3139        return NULL;
3140    }
3141
3142    l = len;
3143    rcu_read_lock();
3144    mr = address_space_translate(as, addr, &xlat, &l, is_write);
3145
3146    if (!memory_access_is_direct(mr, is_write)) {
3147        if (atomic_xchg(&bounce.in_use, true)) {
3148            rcu_read_unlock();
3149            return NULL;
3150        }
3151        /* Avoid unbounded allocations */
3152        l = MIN(l, TARGET_PAGE_SIZE);
3153        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
3154        bounce.addr = addr;
3155        bounce.len = l;
3156
3157        memory_region_ref(mr);
3158        bounce.mr = mr;
3159        if (!is_write) {
3160            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3161                               bounce.buffer, l);
3162        }
3163
3164        rcu_read_unlock();
3165        *plen = l;
3166        return bounce.buffer;
3167    }
3168
3169    base = xlat;
3170    raddr = memory_region_get_ram_addr(mr);
3171
3172    for (;;) {
3173        len -= l;
3174        addr += l;
3175        done += l;
3176        if (len == 0) {
3177            break;
3178        }
3179
3180        l = len;
3181        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3182        if (this_mr != mr || xlat != base + done) {
3183            break;
3184        }
3185    }
3186
3187    memory_region_ref(mr);
3188    *plen = done;
3189    ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3190    rcu_read_unlock();
3191
3192    return ptr;
3193}
3194
3195/* Unmaps a memory region previously mapped by address_space_map().
3196 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3197 * the amount of memory that was actually read or written by the caller.
3198 */
3199void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3200                         int is_write, hwaddr access_len)
3201{
3202    if (buffer != bounce.buffer) {
3203        MemoryRegion *mr;
3204        ram_addr_t addr1;
3205
3206        mr = qemu_ram_addr_from_host(buffer, &addr1);
3207        assert(mr != NULL);
3208        if (is_write) {
3209            invalidate_and_set_dirty(mr, addr1, access_len);
3210        }
3211        if (xen_enabled()) {
3212            xen_invalidate_map_cache_entry(buffer);
3213        }
3214        memory_region_unref(mr);
3215        return;
3216    }
3217    if (is_write) {
3218        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3219                            bounce.buffer, access_len);
3220    }
3221    qemu_vfree(bounce.buffer);
3222    bounce.buffer = NULL;
3223    memory_region_unref(bounce.mr);
3224    atomic_mb_set(&bounce.in_use, false);
3225    cpu_notify_map_clients();
3226}
3227
3228void *cpu_physical_memory_map(hwaddr addr,
3229                              hwaddr *plen,
3230                              int is_write)
3231{
3232    return address_space_map(&address_space_memory, addr, plen, is_write);
3233}
3234
3235void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3236                               int is_write, hwaddr access_len)
3237{
3238    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3239}
3240
3241/* warning: addr must be aligned */
3242static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3243                                                  MemTxAttrs attrs,
3244                                                  MemTxResult *result,
3245                                                  enum device_endian endian)
3246{
3247    uint8_t *ptr;
3248    uint64_t val;
3249    MemoryRegion *mr;
3250    hwaddr l = 4;
3251    hwaddr addr1;
3252    MemTxResult r;
3253    bool release_lock = false;
3254
3255    rcu_read_lock();
3256    mr = address_space_translate_attr(as, addr, &addr1, &l, false, &attrs);
3257    if (l < 4 || !memory_access_is_direct(mr, false)) {
3258        release_lock |= prepare_mmio_access(mr);
3259
3260        /* I/O case */
3261        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3262#if defined(TARGET_WORDS_BIGENDIAN)
3263        if (endian == DEVICE_LITTLE_ENDIAN) {
3264            val = bswap32(val);
3265        }
3266#else
3267        if (endian == DEVICE_BIG_ENDIAN) {
3268            val = bswap32(val);
3269        }
3270#endif
3271    } else {
3272        /* RAM case */
3273        ptr = qemu_get_ram_ptr(mr->ram_block,
3274                               (memory_region_get_ram_addr(mr)
3275                                & TARGET_PAGE_MASK)
3276                               + addr1);
3277        switch (endian) {
3278        case DEVICE_LITTLE_ENDIAN:
3279            val = ldl_le_p(ptr);
3280            break;
3281        case DEVICE_BIG_ENDIAN:
3282            val = ldl_be_p(ptr);
3283            break;
3284        default:
3285            val = ldl_p(ptr);
3286            break;
3287        }
3288        r = MEMTX_OK;
3289    }
3290    if (result) {
3291        *result = r;
3292    }
3293    if (release_lock) {
3294        qemu_mutex_unlock_iothread();
3295    }
3296    rcu_read_unlock();
3297    return val;
3298}
3299
3300uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3301                           MemTxAttrs attrs, MemTxResult *result)
3302{
3303    return address_space_ldl_internal(as, addr, attrs, result,
3304                                      DEVICE_NATIVE_ENDIAN);
3305}
3306
3307uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3308                              MemTxAttrs attrs, MemTxResult *result)
3309{
3310    return address_space_ldl_internal(as, addr, attrs, result,
3311                                      DEVICE_LITTLE_ENDIAN);
3312}
3313
3314uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3315                              MemTxAttrs attrs, MemTxResult *result)
3316{
3317    return address_space_ldl_internal(as, addr, attrs, result,
3318                                      DEVICE_BIG_ENDIAN);
3319}
3320
3321uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3322{
3323    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3324}
3325
3326uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3327{
3328    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3329}
3330
3331uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3332{
3333    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3334}
3335
3336/* warning: addr must be aligned */
3337static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3338                                                  MemTxAttrs attrs,
3339                                                  MemTxResult *result,
3340                                                  enum device_endian endian)
3341{
3342    uint8_t *ptr;
3343    uint64_t val;
3344    MemoryRegion *mr;
3345    hwaddr l = 8;
3346    hwaddr addr1;
3347    MemTxResult r;
3348    bool release_lock = false;
3349
3350    rcu_read_lock();
3351    mr = address_space_translate_attr(as, addr, &addr1, &l,
3352                                 false, &attrs);
3353    if (l < 8 || !memory_access_is_direct(mr, false)) {
3354        release_lock |= prepare_mmio_access(mr);
3355
3356        /* I/O case */
3357        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3358#if defined(TARGET_WORDS_BIGENDIAN)
3359        if (endian == DEVICE_LITTLE_ENDIAN) {
3360            val = bswap64(val);
3361        }
3362#else
3363        if (endian == DEVICE_BIG_ENDIAN) {
3364            val = bswap64(val);
3365        }
3366#endif
3367    } else {
3368        /* RAM case */
3369        ptr = qemu_get_ram_ptr(mr->ram_block,
3370                               (memory_region_get_ram_addr(mr)
3371                                & TARGET_PAGE_MASK)
3372                               + addr1);
3373        switch (endian) {
3374        case DEVICE_LITTLE_ENDIAN:
3375            val = ldq_le_p(ptr);
3376            break;
3377        case DEVICE_BIG_ENDIAN:
3378            val = ldq_be_p(ptr);
3379            break;
3380        default:
3381            val = ldq_p(ptr);
3382            break;
3383        }
3384        r = MEMTX_OK;
3385    }
3386    if (result) {
3387        *result = r;
3388    }
3389    if (release_lock) {
3390        qemu_mutex_unlock_iothread();
3391    }
3392    rcu_read_unlock();
3393    return val;
3394}
3395
3396uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3397                           MemTxAttrs attrs, MemTxResult *result)
3398{
3399    return address_space_ldq_internal(as, addr, attrs, result,
3400                                      DEVICE_NATIVE_ENDIAN);
3401}
3402
3403uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3404                           MemTxAttrs attrs, MemTxResult *result)
3405{
3406    return address_space_ldq_internal(as, addr, attrs, result,
3407                                      DEVICE_LITTLE_ENDIAN);
3408}
3409
3410uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3411                           MemTxAttrs attrs, MemTxResult *result)
3412{
3413    return address_space_ldq_internal(as, addr, attrs, result,
3414                                      DEVICE_BIG_ENDIAN);
3415}
3416
3417uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3418{
3419    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3420}
3421
3422uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3423{
3424    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3425}
3426
3427uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3428{
3429    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3430}
3431
3432/* XXX: optimize */
3433uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3434                            MemTxAttrs attrs, MemTxResult *result)
3435{
3436    uint8_t val;
3437    MemTxResult r;
3438
3439    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3440    if (result) {
3441        *result = r;
3442    }
3443    return val;
3444}
3445
3446uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3447{
3448    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3449}
3450
3451/* warning: addr must be aligned */
3452static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3453                                                   hwaddr addr,
3454                                                   MemTxAttrs attrs,
3455                                                   MemTxResult *result,
3456                                                   enum device_endian endian)
3457{
3458    uint8_t *ptr;
3459    uint64_t val;
3460    MemoryRegion *mr;
3461    hwaddr l = 2;
3462    hwaddr addr1;
3463    MemTxResult r;
3464    bool release_lock = false;
3465
3466    rcu_read_lock();
3467    mr = address_space_translate_attr(as, addr, &addr1, &l,
3468                                 false, &attrs);
3469    if (l < 2 || !memory_access_is_direct(mr, false)) {
3470        release_lock |= prepare_mmio_access(mr);
3471
3472        /* I/O case */
3473        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3474#if defined(TARGET_WORDS_BIGENDIAN)
3475        if (endian == DEVICE_LITTLE_ENDIAN) {
3476            val = bswap16(val);
3477        }
3478#else
3479        if (endian == DEVICE_BIG_ENDIAN) {
3480            val = bswap16(val);
3481        }
3482#endif
3483    } else {
3484        /* RAM case */
3485        ptr = qemu_get_ram_ptr(mr->ram_block,
3486                               (memory_region_get_ram_addr(mr)
3487                                & TARGET_PAGE_MASK)
3488                               + addr1);
3489        switch (endian) {
3490        case DEVICE_LITTLE_ENDIAN:
3491            val = lduw_le_p(ptr);
3492            break;
3493        case DEVICE_BIG_ENDIAN:
3494            val = lduw_be_p(ptr);
3495            break;
3496        default:
3497            val = lduw_p(ptr);
3498            break;
3499        }
3500        r = MEMTX_OK;
3501    }
3502    if (result) {
3503        *result = r;
3504    }
3505    if (release_lock) {
3506        qemu_mutex_unlock_iothread();
3507    }
3508    rcu_read_unlock();
3509    return val;
3510}
3511
3512uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3513                           MemTxAttrs attrs, MemTxResult *result)
3514{
3515    return address_space_lduw_internal(as, addr, attrs, result,
3516                                       DEVICE_NATIVE_ENDIAN);
3517}
3518
3519uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3520                           MemTxAttrs attrs, MemTxResult *result)
3521{
3522    return address_space_lduw_internal(as, addr, attrs, result,
3523                                       DEVICE_LITTLE_ENDIAN);
3524}
3525
3526uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3527                           MemTxAttrs attrs, MemTxResult *result)
3528{
3529    return address_space_lduw_internal(as, addr, attrs, result,
3530                                       DEVICE_BIG_ENDIAN);
3531}
3532
3533uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3534{
3535    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3536}
3537
3538uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3539{
3540    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3541}
3542
3543uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3544{
3545    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3546}
3547
3548/* warning: addr must be aligned. The ram page is not masked as dirty
3549   and the code inside is not invalidated. It is useful if the dirty
3550   bits are used to track modified PTEs */
3551void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3552                                MemTxAttrs attrs, MemTxResult *result)
3553{
3554    uint8_t *ptr;
3555    MemoryRegion *mr;
3556    hwaddr l = 4;
3557    hwaddr addr1;
3558    MemTxResult r;
3559    uint8_t dirty_log_mask;
3560    bool release_lock = false;
3561
3562    rcu_read_lock();
3563    mr = address_space_translate_attr(as, addr, &addr1, &l,
3564                                 true, &attrs);
3565    if (l < 4 || !memory_access_is_direct(mr, true)) {
3566        release_lock |= prepare_mmio_access(mr);
3567
3568        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3569    } else {
3570        addr1 += memory_region_get_ram_addr(mr);
3571        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3572        stl_p(ptr, val);
3573
3574        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3575        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3576        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3577        r = MEMTX_OK;
3578    }
3579    if (result) {
3580        *result = r;
3581    }
3582    if (release_lock) {
3583        qemu_mutex_unlock_iothread();
3584    }
3585    rcu_read_unlock();
3586}
3587
3588void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3589{
3590    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3591}
3592
3593/* warning: addr must be aligned */
3594static inline void address_space_stl_internal(AddressSpace *as,
3595                                              hwaddr addr, uint32_t val,
3596                                              MemTxAttrs attrs,
3597                                              MemTxResult *result,
3598                                              enum device_endian endian)
3599{
3600    uint8_t *ptr;
3601    MemoryRegion *mr;
3602    hwaddr l = 4;
3603    hwaddr addr1;
3604    MemTxResult r;
3605    bool release_lock = false;
3606
3607    rcu_read_lock();
3608    mr = address_space_translate_attr(as, addr, &addr1, &l,
3609                                 true, &attrs);
3610    if (l < 4 || !memory_access_is_direct(mr, true)) {
3611        release_lock |= prepare_mmio_access(mr);
3612
3613#if defined(TARGET_WORDS_BIGENDIAN)
3614        if (endian == DEVICE_LITTLE_ENDIAN) {
3615            val = bswap32(val);
3616        }
3617#else
3618        if (endian == DEVICE_BIG_ENDIAN) {
3619            val = bswap32(val);
3620        }
3621#endif
3622        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3623    } else {
3624        /* RAM case */
3625        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3626        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3627        switch (endian) {
3628        case DEVICE_LITTLE_ENDIAN:
3629            stl_le_p(ptr, val);
3630            break;
3631        case DEVICE_BIG_ENDIAN:
3632            stl_be_p(ptr, val);
3633            break;
3634        default:
3635            stl_p(ptr, val);
3636            break;
3637        }
3638        invalidate_and_set_dirty(mr, addr1, 4);
3639        r = MEMTX_OK;
3640    }
3641    if (result) {
3642        *result = r;
3643    }
3644    if (release_lock) {
3645        qemu_mutex_unlock_iothread();
3646    }
3647    rcu_read_unlock();
3648}
3649
3650void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3651                       MemTxAttrs attrs, MemTxResult *result)
3652{
3653    address_space_stl_internal(as, addr, val, attrs, result,
3654                               DEVICE_NATIVE_ENDIAN);
3655}
3656
3657void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3658                       MemTxAttrs attrs, MemTxResult *result)
3659{
3660    address_space_stl_internal(as, addr, val, attrs, result,
3661                               DEVICE_LITTLE_ENDIAN);
3662}
3663
3664void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3665                       MemTxAttrs attrs, MemTxResult *result)
3666{
3667    address_space_stl_internal(as, addr, val, attrs, result,
3668                               DEVICE_BIG_ENDIAN);
3669}
3670
3671void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3672{
3673    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3674}
3675
3676void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3677{
3678    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3679}
3680
3681void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3682{
3683    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3684}
3685
3686/* XXX: optimize */
3687void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3688                       MemTxAttrs attrs, MemTxResult *result)
3689{
3690    uint8_t v = val;
3691    MemTxResult r;
3692
3693    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3694    if (result) {
3695        *result = r;
3696    }
3697}
3698
3699void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3700{
3701    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3702}
3703
3704/* warning: addr must be aligned */
3705static inline void address_space_stw_internal(AddressSpace *as,
3706                                              hwaddr addr, uint32_t val,
3707                                              MemTxAttrs attrs,
3708                                              MemTxResult *result,
3709                                              enum device_endian endian)
3710{
3711    uint8_t *ptr;
3712    MemoryRegion *mr;
3713    hwaddr l = 2;
3714    hwaddr addr1;
3715    MemTxResult r;
3716    bool release_lock = false;
3717
3718    rcu_read_lock();
3719    mr = address_space_translate_attr(as, addr, &addr1, &l, true, &attrs);
3720    if (l < 2 || !memory_access_is_direct(mr, true)) {
3721        release_lock |= prepare_mmio_access(mr);
3722
3723#if defined(TARGET_WORDS_BIGENDIAN)
3724        if (endian == DEVICE_LITTLE_ENDIAN) {
3725            val = bswap16(val);
3726        }
3727#else
3728        if (endian == DEVICE_BIG_ENDIAN) {
3729            val = bswap16(val);
3730        }
3731#endif
3732        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3733    } else {
3734        /* RAM case */
3735        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3736        ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3737        switch (endian) {
3738        case DEVICE_LITTLE_ENDIAN:
3739            stw_le_p(ptr, val);
3740            break;
3741        case DEVICE_BIG_ENDIAN:
3742            stw_be_p(ptr, val);
3743            break;
3744        default:
3745            stw_p(ptr, val);
3746            break;
3747        }
3748        invalidate_and_set_dirty(mr, addr1, 2);
3749        r = MEMTX_OK;
3750    }
3751    if (result) {
3752        *result = r;
3753    }
3754    if (release_lock) {
3755        qemu_mutex_unlock_iothread();
3756    }
3757    rcu_read_unlock();
3758}
3759
3760void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3761                       MemTxAttrs attrs, MemTxResult *result)
3762{
3763    address_space_stw_internal(as, addr, val, attrs, result,
3764                               DEVICE_NATIVE_ENDIAN);
3765}
3766
3767void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3768                       MemTxAttrs attrs, MemTxResult *result)
3769{
3770    address_space_stw_internal(as, addr, val, attrs, result,
3771                               DEVICE_LITTLE_ENDIAN);
3772}
3773
3774void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3775                       MemTxAttrs attrs, MemTxResult *result)
3776{
3777    address_space_stw_internal(as, addr, val, attrs, result,
3778                               DEVICE_BIG_ENDIAN);
3779}
3780
3781void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3782{
3783    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3784}
3785
3786void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3787{
3788    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3789}
3790
3791void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3792{
3793    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3794}
3795
3796/* XXX: optimize */
3797void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3798                       MemTxAttrs attrs, MemTxResult *result)
3799{
3800    MemTxResult r;
3801    val = tswap64(val);
3802    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3803    if (result) {
3804        *result = r;
3805    }
3806}
3807
3808void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3809                       MemTxAttrs attrs, MemTxResult *result)
3810{
3811    MemTxResult r;
3812    val = cpu_to_le64(val);
3813    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3814    if (result) {
3815        *result = r;
3816    }
3817}
3818void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3819                       MemTxAttrs attrs, MemTxResult *result)
3820{
3821    MemTxResult r;
3822    val = cpu_to_be64(val);
3823    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3824    if (result) {
3825        *result = r;
3826    }
3827}
3828
3829void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3830{
3831    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3832}
3833
3834void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3835{
3836    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3837}
3838
3839void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3840{
3841    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3842}
3843
3844/* virtual memory access for debug (includes writing to ROM) */
3845int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3846                        uint8_t *buf, int len, int is_write)
3847{
3848    int l;
3849    hwaddr phys_addr;
3850    target_ulong page;
3851
3852    while (len > 0) {
3853        int asidx;
3854        MemTxAttrs attrs;
3855
3856        page = addr & TARGET_PAGE_MASK;
3857        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3858        asidx = cpu_asidx_from_attrs(cpu, attrs);
3859        /* if no physical page mapped, return an error */
3860        if (phys_addr == -1)
3861            return -1;
3862        l = (page + TARGET_PAGE_SIZE) - addr;
3863        if (l > len)
3864            l = len;
3865        phys_addr += (addr & ~TARGET_PAGE_MASK);
3866        if (is_write) {
3867            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3868                                          phys_addr, buf, l);
3869        } else {
3870            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3871                             MEMTXATTRS_UNSPECIFIED,
3872                             buf, l, 0);
3873        }
3874        len -= l;
3875        buf += l;
3876        addr += l;
3877    }
3878    return 0;
3879}
3880
3881/*
3882 * Allows code that needs to deal with migration bitmaps etc to still be built
3883 * target independent.
3884 */
3885size_t qemu_target_page_bits(void)
3886{
3887    return TARGET_PAGE_BITS;
3888}
3889
3890#endif
3891
3892/*
3893 * A helper function for the _utterly broken_ virtio device model to find out if
3894 * it's running on a big endian machine. Don't do this at home kids!
3895 */
3896bool target_words_bigendian(void);
3897bool target_words_bigendian(void)
3898{
3899#if defined(TARGET_WORDS_BIGENDIAN)
3900    return true;
3901#else
3902    return false;
3903#endif
3904}
3905
3906#ifndef CONFIG_USER_ONLY
3907bool cpu_physical_memory_is_io(hwaddr phys_addr)
3908{
3909    MemoryRegion*mr;
3910    hwaddr l = 1;
3911    bool res;
3912
3913    rcu_read_lock();
3914    mr = address_space_translate(&address_space_memory,
3915                                 phys_addr, &phys_addr, &l, false);
3916
3917    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3918    rcu_read_unlock();
3919    return res;
3920}
3921
3922int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3923{
3924    RAMBlock *block;
3925    int ret = 0;
3926
3927    rcu_read_lock();
3928    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3929        ret = func(block->idstr, block->host, block->offset,
3930                   block->used_length, opaque);
3931        if (ret) {
3932            break;
3933        }
3934    }
3935    rcu_read_unlock();
3936    return ret;
3937}
3938#endif
3939
3940/* FIXME: rewrite - badly needed */
3941
3942void cpu_halt_reset_common(CPUState *cpu, bool *change, bool val, bool force)
3943{
3944    bool new_val;
3945    bool old_val = cpu->reset_pin || cpu->halt_pin || cpu->arch_halt_pin;
3946
3947    if (change) {
3948        *change = val;
3949    }
3950    new_val = cpu->reset_pin || cpu->halt_pin || cpu->arch_halt_pin;
3951
3952    if (new_val) {
3953        cpu_interrupt(cpu, CPU_INTERRUPT_HALT);
3954    }
3955
3956    if (new_val == old_val && !force) {
3957        return;
3958    }
3959
3960    if (!new_val) {
3961        cpu_reset_interrupt(cpu, CPU_INTERRUPT_HALT);
3962        cpu_interrupt(cpu, CPU_INTERRUPT_EXITTB);
3963        cpu->halted = 0;
3964    }
3965}
3966
3967void cpu_reset_gpio(void *opaque, int irq, int level)
3968{
3969    CPUState *cpu = CPU(opaque);
3970
3971    if (level == cpu->reset_pin) {
3972        return;
3973    }
3974    if (level || cpu->reset_pin) {
3975        cpu_reset(cpu);
3976        cpu_halt_reset_common(cpu, &cpu->reset_pin, level, true);
3977    } else {
3978        cpu_halt_reset_common(cpu, &cpu->reset_pin, level, false);
3979    }
3980}
3981
3982void cpu_halt_gpio(void *opaque, int irq, int level)
3983{
3984    CPUState *cpu = CPU(opaque);
3985
3986    cpu_halt_reset_common(cpu, &cpu->halt_pin, level, false);
3987}
3988