qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "qapi/error.h"
  21#ifndef _WIN32
  22#endif
  23
  24#include "qemu/cutils.h"
  25#include "cpu.h"
  26#include "exec/exec-all.h"
  27#include "tcg.h"
  28#include "hw/qdev-core.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#include "hw/xen/xen.h"
  32#endif
  33#include "sysemu/kvm.h"
  34#include "sysemu/sysemu.h"
  35#include "qemu/timer.h"
  36#include "qemu/config-file.h"
  37#include "qemu/error-report.h"
  38#if defined(CONFIG_USER_ONLY)
  39#include "qemu.h"
  40#else /* !CONFIG_USER_ONLY */
  41#include "hw/hw.h"
  42#include "exec/memory.h"
  43#include "exec/ioport.h"
  44#include "sysemu/dma.h"
  45#include "exec/address-spaces.h"
  46#include "sysemu/xen-mapcache.h"
  47#include "trace.h"
  48#endif
  49#include "exec/cpu-all.h"
  50#include "qemu/rcu_queue.h"
  51#include "qemu/main-loop.h"
  52#include "translate-all.h"
  53#include "sysemu/replay.h"
  54
  55#include "exec/memory-internal.h"
  56#include "exec/ram_addr.h"
  57#include "exec/log.h"
  58
  59#include "migration/vmstate.h"
  60
  61#include "qemu/range.h"
  62#ifndef _WIN32
  63#include "qemu/mmap-alloc.h"
  64#endif
  65
  66//#define DEBUG_SUBPAGE
  67
  68#if !defined(CONFIG_USER_ONLY)
  69/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  70 * are protected by the ramlist lock.
  71 */
  72RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  73
  74static MemoryRegion *system_memory;
  75static MemoryRegion *system_io;
  76
  77AddressSpace address_space_io;
  78AddressSpace address_space_memory;
  79
  80MemoryRegion io_mem_rom, io_mem_notdirty;
  81static MemoryRegion io_mem_unassigned;
  82
  83/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  84#define RAM_PREALLOC   (1 << 0)
  85
  86/* RAM is mmap-ed with MAP_SHARED */
  87#define RAM_SHARED     (1 << 1)
  88
  89/* Only a portion of RAM (used_length) is actually used, and migrated.
  90 * This used_length size can change across reboots.
  91 */
  92#define RAM_RESIZEABLE (1 << 2)
  93
  94#endif
  95
  96#ifdef TARGET_PAGE_BITS_VARY
  97int target_page_bits;
  98bool target_page_bits_decided;
  99#endif
 100
 101struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 102/* current CPU in the current thread. It is only valid inside
 103   cpu_exec() */
 104__thread CPUState *current_cpu;
 105/* 0 = Do not count executed instructions.
 106   1 = Precise instruction counting.
 107   2 = Adaptive rate instruction counting.  */
 108int use_icount;
 109
 110bool set_preferred_target_page_bits(int bits)
 111{
 112    /* The target page size is the lowest common denominator for all
 113     * the CPUs in the system, so we can only make it smaller, never
 114     * larger. And we can't make it smaller once we've committed to
 115     * a particular size.
 116     */
 117#ifdef TARGET_PAGE_BITS_VARY
 118    assert(bits >= TARGET_PAGE_BITS_MIN);
 119    if (target_page_bits == 0 || target_page_bits > bits) {
 120        if (target_page_bits_decided) {
 121            return false;
 122        }
 123        target_page_bits = bits;
 124    }
 125#endif
 126    return true;
 127}
 128
 129#if !defined(CONFIG_USER_ONLY)
 130
 131static void finalize_target_page_bits(void)
 132{
 133#ifdef TARGET_PAGE_BITS_VARY
 134    if (target_page_bits == 0) {
 135        target_page_bits = TARGET_PAGE_BITS_MIN;
 136    }
 137    target_page_bits_decided = true;
 138#endif
 139}
 140
 141typedef struct PhysPageEntry PhysPageEntry;
 142
 143struct PhysPageEntry {
 144    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 145    uint32_t skip : 6;
 146     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 147    uint32_t ptr : 26;
 148};
 149
 150#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 151
 152/* Size of the L2 (and L3, etc) page tables.  */
 153#define ADDR_SPACE_BITS 64
 154
 155#define P_L2_BITS 9
 156#define P_L2_SIZE (1 << P_L2_BITS)
 157
 158#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 159
 160typedef PhysPageEntry Node[P_L2_SIZE];
 161
 162typedef struct PhysPageMap {
 163    struct rcu_head rcu;
 164
 165    unsigned sections_nb;
 166    unsigned sections_nb_alloc;
 167    unsigned nodes_nb;
 168    unsigned nodes_nb_alloc;
 169    Node *nodes;
 170    MemoryRegionSection *sections;
 171} PhysPageMap;
 172
 173struct AddressSpaceDispatch {
 174    struct rcu_head rcu;
 175
 176    MemoryRegionSection *mru_section;
 177    /* This is a multi-level map on the physical address space.
 178     * The bottom level has pointers to MemoryRegionSections.
 179     */
 180    PhysPageEntry phys_map;
 181    PhysPageMap map;
 182    AddressSpace *as;
 183};
 184
 185#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 186typedef struct subpage_t {
 187    MemoryRegion iomem;
 188    AddressSpace *as;
 189    hwaddr base;
 190    uint16_t sub_section[];
 191} subpage_t;
 192
 193#define PHYS_SECTION_UNASSIGNED 0
 194#define PHYS_SECTION_NOTDIRTY 1
 195#define PHYS_SECTION_ROM 2
 196#define PHYS_SECTION_WATCH 3
 197
 198static void io_mem_init(void);
 199static void memory_map_init(void);
 200static void tcg_commit(MemoryListener *listener);
 201
 202static MemoryRegion io_mem_watch;
 203
 204/**
 205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 206 * @cpu: the CPU whose AddressSpace this is
 207 * @as: the AddressSpace itself
 208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 210 */
 211struct CPUAddressSpace {
 212    CPUState *cpu;
 213    AddressSpace *as;
 214    struct AddressSpaceDispatch *memory_dispatch;
 215    MemoryListener tcg_as_listener;
 216};
 217
 218#endif
 219
 220#if !defined(CONFIG_USER_ONLY)
 221
 222static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 223{
 224    static unsigned alloc_hint = 16;
 225    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 226        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
 227        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 228        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 229        alloc_hint = map->nodes_nb_alloc;
 230    }
 231}
 232
 233static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 234{
 235    unsigned i;
 236    uint32_t ret;
 237    PhysPageEntry e;
 238    PhysPageEntry *p;
 239
 240    ret = map->nodes_nb++;
 241    p = map->nodes[ret];
 242    assert(ret != PHYS_MAP_NODE_NIL);
 243    assert(ret != map->nodes_nb_alloc);
 244
 245    e.skip = leaf ? 0 : 1;
 246    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 247    for (i = 0; i < P_L2_SIZE; ++i) {
 248        memcpy(&p[i], &e, sizeof(e));
 249    }
 250    return ret;
 251}
 252
 253static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 254                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 255                                int level)
 256{
 257    PhysPageEntry *p;
 258    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 259
 260    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 261        lp->ptr = phys_map_node_alloc(map, level == 0);
 262    }
 263    p = map->nodes[lp->ptr];
 264    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 265
 266    while (*nb && lp < &p[P_L2_SIZE]) {
 267        if ((*index & (step - 1)) == 0 && *nb >= step) {
 268            lp->skip = 0;
 269            lp->ptr = leaf;
 270            *index += step;
 271            *nb -= step;
 272        } else {
 273            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 274        }
 275        ++lp;
 276    }
 277}
 278
 279static void phys_page_set(AddressSpaceDispatch *d,
 280                          hwaddr index, hwaddr nb,
 281                          uint16_t leaf)
 282{
 283    /* Wildly overreserve - it doesn't matter much. */
 284    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 285
 286    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 287}
 288
 289/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 290 * and update our entry so we can skip it and go directly to the destination.
 291 */
 292static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 293{
 294    unsigned valid_ptr = P_L2_SIZE;
 295    int valid = 0;
 296    PhysPageEntry *p;
 297    int i;
 298
 299    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 300        return;
 301    }
 302
 303    p = nodes[lp->ptr];
 304    for (i = 0; i < P_L2_SIZE; i++) {
 305        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 306            continue;
 307        }
 308
 309        valid_ptr = i;
 310        valid++;
 311        if (p[i].skip) {
 312            phys_page_compact(&p[i], nodes);
 313        }
 314    }
 315
 316    /* We can only compress if there's only one child. */
 317    if (valid != 1) {
 318        return;
 319    }
 320
 321    assert(valid_ptr < P_L2_SIZE);
 322
 323    /* Don't compress if it won't fit in the # of bits we have. */
 324    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 325        return;
 326    }
 327
 328    lp->ptr = p[valid_ptr].ptr;
 329    if (!p[valid_ptr].skip) {
 330        /* If our only child is a leaf, make this a leaf. */
 331        /* By design, we should have made this node a leaf to begin with so we
 332         * should never reach here.
 333         * But since it's so simple to handle this, let's do it just in case we
 334         * change this rule.
 335         */
 336        lp->skip = 0;
 337    } else {
 338        lp->skip += p[valid_ptr].skip;
 339    }
 340}
 341
 342static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 343{
 344    if (d->phys_map.skip) {
 345        phys_page_compact(&d->phys_map, d->map.nodes);
 346    }
 347}
 348
 349static inline bool section_covers_addr(const MemoryRegionSection *section,
 350                                       hwaddr addr)
 351{
 352    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 353     * the section must cover the entire address space.
 354     */
 355    return int128_gethi(section->size) ||
 356           range_covers_byte(section->offset_within_address_space,
 357                             int128_getlo(section->size), addr);
 358}
 359
 360static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 361                                           Node *nodes, MemoryRegionSection *sections)
 362{
 363    PhysPageEntry *p;
 364    hwaddr index = addr >> TARGET_PAGE_BITS;
 365    int i;
 366
 367    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 368        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 369            return &sections[PHYS_SECTION_UNASSIGNED];
 370        }
 371        p = nodes[lp.ptr];
 372        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 373    }
 374
 375    if (section_covers_addr(&sections[lp.ptr], addr)) {
 376        return &sections[lp.ptr];
 377    } else {
 378        return &sections[PHYS_SECTION_UNASSIGNED];
 379    }
 380}
 381
 382bool memory_region_is_unassigned(MemoryRegion *mr)
 383{
 384    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 385        && mr != &io_mem_watch;
 386}
 387
 388/* Called from RCU critical section */
 389static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 390                                                        hwaddr addr,
 391                                                        bool resolve_subpage)
 392{
 393    MemoryRegionSection *section = atomic_read(&d->mru_section);
 394    subpage_t *subpage;
 395    bool update;
 396
 397    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
 398        section_covers_addr(section, addr)) {
 399        update = false;
 400    } else {
 401        section = phys_page_find(d->phys_map, addr, d->map.nodes,
 402                                 d->map.sections);
 403        update = true;
 404    }
 405    if (resolve_subpage && section->mr->subpage) {
 406        subpage = container_of(section->mr, subpage_t, iomem);
 407        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 408    }
 409    if (update) {
 410        atomic_set(&d->mru_section, section);
 411    }
 412    return section;
 413}
 414
 415/* Called from RCU critical section */
 416static MemoryRegionSection *
 417address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 418                                 hwaddr *plen, bool resolve_subpage)
 419{
 420    MemoryRegionSection *section;
 421    MemoryRegion *mr;
 422    Int128 diff;
 423
 424    section = address_space_lookup_region(d, addr, resolve_subpage);
 425    /* Compute offset within MemoryRegionSection */
 426    addr -= section->offset_within_address_space;
 427
 428    /* Compute offset within MemoryRegion */
 429    *xlat = addr + section->offset_within_region;
 430
 431    mr = section->mr;
 432
 433    /* MMIO registers can be expected to perform full-width accesses based only
 434     * on their address, without considering adjacent registers that could
 435     * decode to completely different MemoryRegions.  When such registers
 436     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 437     * regions overlap wildly.  For this reason we cannot clamp the accesses
 438     * here.
 439     *
 440     * If the length is small (as is the case for address_space_ldl/stl),
 441     * everything works fine.  If the incoming length is large, however,
 442     * the caller really has to do the clamping through memory_access_size.
 443     */
 444    if (memory_region_is_ram(mr)) {
 445        diff = int128_sub(section->size, int128_make64(addr));
 446        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 447    }
 448    return section;
 449}
 450
 451/* Called from RCU critical section */
 452MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 453                                      hwaddr *xlat, hwaddr *plen,
 454                                      bool is_write)
 455{
 456    IOMMUTLBEntry iotlb;
 457    MemoryRegionSection *section;
 458    MemoryRegion *mr;
 459
 460    for (;;) {
 461        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 462        section = address_space_translate_internal(d, addr, &addr, plen, true);
 463        mr = section->mr;
 464
 465        if (!mr->iommu_ops) {
 466            break;
 467        }
 468
 469        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 470        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 471                | (addr & iotlb.addr_mask));
 472        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 473        if (!(iotlb.perm & (1 << is_write))) {
 474            mr = &io_mem_unassigned;
 475            break;
 476        }
 477
 478        as = iotlb.target_as;
 479    }
 480
 481    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 482        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 483        *plen = MIN(page, *plen);
 484    }
 485
 486    *xlat = addr;
 487    return mr;
 488}
 489
 490/* Called from RCU critical section */
 491MemoryRegionSection *
 492address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
 493                                  hwaddr *xlat, hwaddr *plen)
 494{
 495    MemoryRegionSection *section;
 496    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 497
 498    section = address_space_translate_internal(d, addr, xlat, plen, false);
 499
 500    assert(!section->mr->iommu_ops);
 501    return section;
 502}
 503#endif
 504
 505#if !defined(CONFIG_USER_ONLY)
 506
 507static int cpu_common_post_load(void *opaque, int version_id)
 508{
 509    CPUState *cpu = opaque;
 510
 511    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 512       version_id is increased. */
 513    cpu->interrupt_request &= ~0x01;
 514    tlb_flush(cpu, 1);
 515
 516    return 0;
 517}
 518
 519static int cpu_common_pre_load(void *opaque)
 520{
 521    CPUState *cpu = opaque;
 522
 523    cpu->exception_index = -1;
 524
 525    return 0;
 526}
 527
 528static bool cpu_common_exception_index_needed(void *opaque)
 529{
 530    CPUState *cpu = opaque;
 531
 532    return tcg_enabled() && cpu->exception_index != -1;
 533}
 534
 535static const VMStateDescription vmstate_cpu_common_exception_index = {
 536    .name = "cpu_common/exception_index",
 537    .version_id = 1,
 538    .minimum_version_id = 1,
 539    .needed = cpu_common_exception_index_needed,
 540    .fields = (VMStateField[]) {
 541        VMSTATE_INT32(exception_index, CPUState),
 542        VMSTATE_END_OF_LIST()
 543    }
 544};
 545
 546static bool cpu_common_crash_occurred_needed(void *opaque)
 547{
 548    CPUState *cpu = opaque;
 549
 550    return cpu->crash_occurred;
 551}
 552
 553static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 554    .name = "cpu_common/crash_occurred",
 555    .version_id = 1,
 556    .minimum_version_id = 1,
 557    .needed = cpu_common_crash_occurred_needed,
 558    .fields = (VMStateField[]) {
 559        VMSTATE_BOOL(crash_occurred, CPUState),
 560        VMSTATE_END_OF_LIST()
 561    }
 562};
 563
 564const VMStateDescription vmstate_cpu_common = {
 565    .name = "cpu_common",
 566    .version_id = 1,
 567    .minimum_version_id = 1,
 568    .pre_load = cpu_common_pre_load,
 569    .post_load = cpu_common_post_load,
 570    .fields = (VMStateField[]) {
 571        VMSTATE_UINT32(halted, CPUState),
 572        VMSTATE_UINT32(interrupt_request, CPUState),
 573        VMSTATE_END_OF_LIST()
 574    },
 575    .subsections = (const VMStateDescription*[]) {
 576        &vmstate_cpu_common_exception_index,
 577        &vmstate_cpu_common_crash_occurred,
 578        NULL
 579    }
 580};
 581
 582#endif
 583
 584CPUState *qemu_get_cpu(int index)
 585{
 586    CPUState *cpu;
 587
 588    CPU_FOREACH(cpu) {
 589        if (cpu->cpu_index == index) {
 590            return cpu;
 591        }
 592    }
 593
 594    return NULL;
 595}
 596
 597#if !defined(CONFIG_USER_ONLY)
 598void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
 599{
 600    CPUAddressSpace *newas;
 601
 602    /* Target code should have set num_ases before calling us */
 603    assert(asidx < cpu->num_ases);
 604
 605    if (asidx == 0) {
 606        /* address space 0 gets the convenience alias */
 607        cpu->as = as;
 608    }
 609
 610    /* KVM cannot currently support multiple address spaces. */
 611    assert(asidx == 0 || !kvm_enabled());
 612
 613    if (!cpu->cpu_ases) {
 614        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
 615    }
 616
 617    newas = &cpu->cpu_ases[asidx];
 618    newas->cpu = cpu;
 619    newas->as = as;
 620    if (tcg_enabled()) {
 621        newas->tcg_as_listener.commit = tcg_commit;
 622        memory_listener_register(&newas->tcg_as_listener, as);
 623    }
 624}
 625
 626AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 627{
 628    /* Return the AddressSpace corresponding to the specified index */
 629    return cpu->cpu_ases[asidx].as;
 630}
 631#endif
 632
 633void cpu_exec_unrealizefn(CPUState *cpu)
 634{
 635    CPUClass *cc = CPU_GET_CLASS(cpu);
 636
 637    cpu_list_remove(cpu);
 638
 639    if (cc->vmsd != NULL) {
 640        vmstate_unregister(NULL, cc->vmsd, cpu);
 641    }
 642    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 643        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
 644    }
 645}
 646
 647void cpu_exec_initfn(CPUState *cpu)
 648{
 649    cpu->as = NULL;
 650    cpu->num_ases = 0;
 651
 652#ifndef CONFIG_USER_ONLY
 653    cpu->thread_id = qemu_get_thread_id();
 654
 655    /* This is a softmmu CPU object, so create a property for it
 656     * so users can wire up its memory. (This can't go in qom/cpu.c
 657     * because that file is compiled only once for both user-mode
 658     * and system builds.) The default if no link is set up is to use
 659     * the system address space.
 660     */
 661    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
 662                             (Object **)&cpu->memory,
 663                             qdev_prop_allow_set_link_before_realize,
 664                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
 665                             &error_abort);
 666    cpu->memory = system_memory;
 667    object_ref(OBJECT(cpu->memory));
 668#endif
 669}
 670
 671void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 672{
 673    CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
 674
 675    cpu_list_add(cpu);
 676
 677#ifndef CONFIG_USER_ONLY
 678    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 679        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
 680    }
 681    if (cc->vmsd != NULL) {
 682        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
 683    }
 684#endif
 685}
 686
 687static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 688{
 689    /* Flush the whole TB as this will not have race conditions
 690     * even if we don't have proper locking yet.
 691     * Ideally we would just invalidate the TBs for the
 692     * specified PC.
 693     */
 694    tb_flush(cpu);
 695}
 696
 697#if defined(CONFIG_USER_ONLY)
 698void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 699
 700{
 701}
 702
 703int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 704                          int flags)
 705{
 706    return -ENOSYS;
 707}
 708
 709void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 710{
 711}
 712
 713int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 714                          int flags, CPUWatchpoint **watchpoint)
 715{
 716    return -ENOSYS;
 717}
 718#else
 719/* Add a watchpoint.  */
 720int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 721                          int flags, CPUWatchpoint **watchpoint)
 722{
 723    CPUWatchpoint *wp;
 724
 725    /* forbid ranges which are empty or run off the end of the address space */
 726    if (len == 0 || (addr + len - 1) < addr) {
 727        error_report("tried to set invalid watchpoint at %"
 728                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 729        return -EINVAL;
 730    }
 731    wp = g_malloc(sizeof(*wp));
 732
 733    wp->vaddr = addr;
 734    wp->len = len;
 735    wp->flags = flags;
 736
 737    /* keep all GDB-injected watchpoints in front */
 738    if (flags & BP_GDB) {
 739        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 740    } else {
 741        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 742    }
 743
 744    tlb_flush_page(cpu, addr);
 745
 746    if (watchpoint)
 747        *watchpoint = wp;
 748    return 0;
 749}
 750
 751/* Remove a specific watchpoint.  */
 752int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 753                          int flags)
 754{
 755    CPUWatchpoint *wp;
 756
 757    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 758        if (addr == wp->vaddr && len == wp->len
 759                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 760            cpu_watchpoint_remove_by_ref(cpu, wp);
 761            return 0;
 762        }
 763    }
 764    return -ENOENT;
 765}
 766
 767/* Remove a specific watchpoint by reference.  */
 768void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 769{
 770    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 771
 772    tlb_flush_page(cpu, watchpoint->vaddr);
 773
 774    g_free(watchpoint);
 775}
 776
 777/* Remove all matching watchpoints.  */
 778void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 779{
 780    CPUWatchpoint *wp, *next;
 781
 782    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 783        if (wp->flags & mask) {
 784            cpu_watchpoint_remove_by_ref(cpu, wp);
 785        }
 786    }
 787}
 788
 789/* Return true if this watchpoint address matches the specified
 790 * access (ie the address range covered by the watchpoint overlaps
 791 * partially or completely with the address range covered by the
 792 * access).
 793 */
 794static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 795                                                  vaddr addr,
 796                                                  vaddr len)
 797{
 798    /* We know the lengths are non-zero, but a little caution is
 799     * required to avoid errors in the case where the range ends
 800     * exactly at the top of the address space and so addr + len
 801     * wraps round to zero.
 802     */
 803    vaddr wpend = wp->vaddr + wp->len - 1;
 804    vaddr addrend = addr + len - 1;
 805
 806    return !(addr > wpend || wp->vaddr > addrend);
 807}
 808
 809#endif
 810
 811/* Add a breakpoint.  */
 812int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 813                          CPUBreakpoint **breakpoint)
 814{
 815    CPUBreakpoint *bp;
 816
 817    bp = g_malloc(sizeof(*bp));
 818
 819    bp->pc = pc;
 820    bp->flags = flags;
 821
 822    /* keep all GDB-injected breakpoints in front */
 823    if (flags & BP_GDB) {
 824        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 825    } else {
 826        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 827    }
 828
 829    breakpoint_invalidate(cpu, pc);
 830
 831    if (breakpoint) {
 832        *breakpoint = bp;
 833    }
 834    return 0;
 835}
 836
 837/* Remove a specific breakpoint.  */
 838int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 839{
 840    CPUBreakpoint *bp;
 841
 842    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 843        if (bp->pc == pc && bp->flags == flags) {
 844            cpu_breakpoint_remove_by_ref(cpu, bp);
 845            return 0;
 846        }
 847    }
 848    return -ENOENT;
 849}
 850
 851/* Remove a specific breakpoint by reference.  */
 852void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 853{
 854    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 855
 856    breakpoint_invalidate(cpu, breakpoint->pc);
 857
 858    g_free(breakpoint);
 859}
 860
 861/* Remove all matching breakpoints. */
 862void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 863{
 864    CPUBreakpoint *bp, *next;
 865
 866    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 867        if (bp->flags & mask) {
 868            cpu_breakpoint_remove_by_ref(cpu, bp);
 869        }
 870    }
 871}
 872
 873/* enable or disable single step mode. EXCP_DEBUG is returned by the
 874   CPU loop after each instruction */
 875void cpu_single_step(CPUState *cpu, int enabled)
 876{
 877    if (cpu->singlestep_enabled != enabled) {
 878        cpu->singlestep_enabled = enabled;
 879        if (kvm_enabled()) {
 880            kvm_update_guest_debug(cpu, 0);
 881        } else {
 882            /* must flush all the translated code to avoid inconsistencies */
 883            /* XXX: only flush what is necessary */
 884            tb_flush(cpu);
 885        }
 886    }
 887}
 888
 889void cpu_abort(CPUState *cpu, const char *fmt, ...)
 890{
 891    va_list ap;
 892    va_list ap2;
 893
 894    va_start(ap, fmt);
 895    va_copy(ap2, ap);
 896    fprintf(stderr, "qemu: fatal: ");
 897    vfprintf(stderr, fmt, ap);
 898    fprintf(stderr, "\n");
 899    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 900    if (qemu_log_separate()) {
 901        qemu_log_lock();
 902        qemu_log("qemu: fatal: ");
 903        qemu_log_vprintf(fmt, ap2);
 904        qemu_log("\n");
 905        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 906        qemu_log_flush();
 907        qemu_log_unlock();
 908        qemu_log_close();
 909    }
 910    va_end(ap2);
 911    va_end(ap);
 912    replay_finish();
 913#if defined(CONFIG_USER_ONLY)
 914    {
 915        struct sigaction act;
 916        sigfillset(&act.sa_mask);
 917        act.sa_handler = SIG_DFL;
 918        sigaction(SIGABRT, &act, NULL);
 919    }
 920#endif
 921    abort();
 922}
 923
 924#if !defined(CONFIG_USER_ONLY)
 925/* Called from RCU critical section */
 926static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 927{
 928    RAMBlock *block;
 929
 930    block = atomic_rcu_read(&ram_list.mru_block);
 931    if (block && addr - block->offset < block->max_length) {
 932        return block;
 933    }
 934    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 935        if (addr - block->offset < block->max_length) {
 936            goto found;
 937        }
 938    }
 939
 940    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 941    abort();
 942
 943found:
 944    /* It is safe to write mru_block outside the iothread lock.  This
 945     * is what happens:
 946     *
 947     *     mru_block = xxx
 948     *     rcu_read_unlock()
 949     *                                        xxx removed from list
 950     *                  rcu_read_lock()
 951     *                  read mru_block
 952     *                                        mru_block = NULL;
 953     *                                        call_rcu(reclaim_ramblock, xxx);
 954     *                  rcu_read_unlock()
 955     *
 956     * atomic_rcu_set is not needed here.  The block was already published
 957     * when it was placed into the list.  Here we're just making an extra
 958     * copy of the pointer.
 959     */
 960    ram_list.mru_block = block;
 961    return block;
 962}
 963
 964static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 965{
 966    CPUState *cpu;
 967    ram_addr_t start1;
 968    RAMBlock *block;
 969    ram_addr_t end;
 970
 971    end = TARGET_PAGE_ALIGN(start + length);
 972    start &= TARGET_PAGE_MASK;
 973
 974    rcu_read_lock();
 975    block = qemu_get_ram_block(start);
 976    assert(block == qemu_get_ram_block(end - 1));
 977    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 978    CPU_FOREACH(cpu) {
 979        tlb_reset_dirty(cpu, start1, length);
 980    }
 981    rcu_read_unlock();
 982}
 983
 984/* Note: start and end must be within the same ram block.  */
 985bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 986                                              ram_addr_t length,
 987                                              unsigned client)
 988{
 989    DirtyMemoryBlocks *blocks;
 990    unsigned long end, page;
 991    bool dirty = false;
 992
 993    if (length == 0) {
 994        return false;
 995    }
 996
 997    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 998    page = start >> TARGET_PAGE_BITS;
 999
1000    rcu_read_lock();
1001
1002    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1003
1004    while (page < end) {
1005        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1006        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1007        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1008
1009        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1010                                              offset, num);
1011        page += num;
1012    }
1013
1014    rcu_read_unlock();
1015
1016    if (dirty && tcg_enabled()) {
1017        tlb_reset_dirty_range_all(start, length);
1018    }
1019
1020    return dirty;
1021}
1022
1023/* Called from RCU critical section */
1024hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1025                                       MemoryRegionSection *section,
1026                                       target_ulong vaddr,
1027                                       hwaddr paddr, hwaddr xlat,
1028                                       int prot,
1029                                       target_ulong *address)
1030{
1031    hwaddr iotlb;
1032    CPUWatchpoint *wp;
1033
1034    if (memory_region_is_ram(section->mr)) {
1035        /* Normal RAM.  */
1036        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1037        if (!section->readonly) {
1038            iotlb |= PHYS_SECTION_NOTDIRTY;
1039        } else {
1040            iotlb |= PHYS_SECTION_ROM;
1041        }
1042    } else {
1043        AddressSpaceDispatch *d;
1044
1045        d = atomic_rcu_read(&section->address_space->dispatch);
1046        iotlb = section - d->map.sections;
1047        iotlb += xlat;
1048    }
1049
1050    /* Make accesses to pages with watchpoints go via the
1051       watchpoint trap routines.  */
1052    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1053        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1054            /* Avoid trapping reads of pages with a write breakpoint. */
1055            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1056                iotlb = PHYS_SECTION_WATCH + paddr;
1057                *address |= TLB_MMIO;
1058                break;
1059            }
1060        }
1061    }
1062
1063    return iotlb;
1064}
1065#endif /* defined(CONFIG_USER_ONLY) */
1066
1067#if !defined(CONFIG_USER_ONLY)
1068
1069static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1070                             uint16_t section);
1071static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1072
1073static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1074                               qemu_anon_ram_alloc;
1075
1076/*
1077 * Set a custom physical guest memory alloator.
1078 * Accelerators with unusual needs may need this.  Hopefully, we can
1079 * get rid of it eventually.
1080 */
1081void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1082{
1083    phys_mem_alloc = alloc;
1084}
1085
1086static uint16_t phys_section_add(PhysPageMap *map,
1087                                 MemoryRegionSection *section)
1088{
1089    /* The physical section number is ORed with a page-aligned
1090     * pointer to produce the iotlb entries.  Thus it should
1091     * never overflow into the page-aligned value.
1092     */
1093    assert(map->sections_nb < TARGET_PAGE_SIZE);
1094
1095    if (map->sections_nb == map->sections_nb_alloc) {
1096        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1097        map->sections = g_renew(MemoryRegionSection, map->sections,
1098                                map->sections_nb_alloc);
1099    }
1100    map->sections[map->sections_nb] = *section;
1101    memory_region_ref(section->mr);
1102    return map->sections_nb++;
1103}
1104
1105static void phys_section_destroy(MemoryRegion *mr)
1106{
1107    bool have_sub_page = mr->subpage;
1108
1109    memory_region_unref(mr);
1110
1111    if (have_sub_page) {
1112        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1113        object_unref(OBJECT(&subpage->iomem));
1114        g_free(subpage);
1115    }
1116}
1117
1118static void phys_sections_free(PhysPageMap *map)
1119{
1120    while (map->sections_nb > 0) {
1121        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1122        phys_section_destroy(section->mr);
1123    }
1124    g_free(map->sections);
1125    g_free(map->nodes);
1126}
1127
1128static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1129{
1130    subpage_t *subpage;
1131    hwaddr base = section->offset_within_address_space
1132        & TARGET_PAGE_MASK;
1133    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1134                                                   d->map.nodes, d->map.sections);
1135    MemoryRegionSection subsection = {
1136        .offset_within_address_space = base,
1137        .size = int128_make64(TARGET_PAGE_SIZE),
1138    };
1139    hwaddr start, end;
1140
1141    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1142
1143    if (!(existing->mr->subpage)) {
1144        subpage = subpage_init(d->as, base);
1145        subsection.address_space = d->as;
1146        subsection.mr = &subpage->iomem;
1147        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1148                      phys_section_add(&d->map, &subsection));
1149    } else {
1150        subpage = container_of(existing->mr, subpage_t, iomem);
1151    }
1152    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1153    end = start + int128_get64(section->size) - 1;
1154    subpage_register(subpage, start, end,
1155                     phys_section_add(&d->map, section));
1156}
1157
1158
1159static void register_multipage(AddressSpaceDispatch *d,
1160                               MemoryRegionSection *section)
1161{
1162    hwaddr start_addr = section->offset_within_address_space;
1163    uint16_t section_index = phys_section_add(&d->map, section);
1164    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1165                                                    TARGET_PAGE_BITS));
1166
1167    assert(num_pages);
1168    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1169}
1170
1171static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1172{
1173    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1174    AddressSpaceDispatch *d = as->next_dispatch;
1175    MemoryRegionSection now = *section, remain = *section;
1176    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1177
1178    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1179        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1180                       - now.offset_within_address_space;
1181
1182        now.size = int128_min(int128_make64(left), now.size);
1183        register_subpage(d, &now);
1184    } else {
1185        now.size = int128_zero();
1186    }
1187    while (int128_ne(remain.size, now.size)) {
1188        remain.size = int128_sub(remain.size, now.size);
1189        remain.offset_within_address_space += int128_get64(now.size);
1190        remain.offset_within_region += int128_get64(now.size);
1191        now = remain;
1192        if (int128_lt(remain.size, page_size)) {
1193            register_subpage(d, &now);
1194        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1195            now.size = page_size;
1196            register_subpage(d, &now);
1197        } else {
1198            now.size = int128_and(now.size, int128_neg(page_size));
1199            register_multipage(d, &now);
1200        }
1201    }
1202}
1203
1204void qemu_flush_coalesced_mmio_buffer(void)
1205{
1206    if (kvm_enabled())
1207        kvm_flush_coalesced_mmio_buffer();
1208}
1209
1210void qemu_mutex_lock_ramlist(void)
1211{
1212    qemu_mutex_lock(&ram_list.mutex);
1213}
1214
1215void qemu_mutex_unlock_ramlist(void)
1216{
1217    qemu_mutex_unlock(&ram_list.mutex);
1218}
1219
1220#ifdef __linux__
1221static int64_t get_file_size(int fd)
1222{
1223    int64_t size = lseek(fd, 0, SEEK_END);
1224    if (size < 0) {
1225        return -errno;
1226    }
1227    return size;
1228}
1229
1230static void *file_ram_alloc(RAMBlock *block,
1231                            ram_addr_t memory,
1232                            const char *path,
1233                            Error **errp)
1234{
1235    bool unlink_on_error = false;
1236    char *filename;
1237    char *sanitized_name;
1238    char *c;
1239    void *area = MAP_FAILED;
1240    int fd = -1;
1241    int64_t file_size;
1242
1243    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1244        error_setg(errp,
1245                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1246        return NULL;
1247    }
1248
1249    for (;;) {
1250        fd = open(path, O_RDWR);
1251        if (fd >= 0) {
1252            /* @path names an existing file, use it */
1253            break;
1254        }
1255        if (errno == ENOENT) {
1256            /* @path names a file that doesn't exist, create it */
1257            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1258            if (fd >= 0) {
1259                unlink_on_error = true;
1260                break;
1261            }
1262        } else if (errno == EISDIR) {
1263            /* @path names a directory, create a file there */
1264            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1265            sanitized_name = g_strdup(memory_region_name(block->mr));
1266            for (c = sanitized_name; *c != '\0'; c++) {
1267                if (*c == '/') {
1268                    *c = '_';
1269                }
1270            }
1271
1272            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1273                                       sanitized_name);
1274            g_free(sanitized_name);
1275
1276            fd = mkstemp(filename);
1277            if (fd >= 0) {
1278                unlink(filename);
1279                g_free(filename);
1280                break;
1281            }
1282            g_free(filename);
1283        }
1284        if (errno != EEXIST && errno != EINTR) {
1285            error_setg_errno(errp, errno,
1286                             "can't open backing store %s for guest RAM",
1287                             path);
1288            goto error;
1289        }
1290        /*
1291         * Try again on EINTR and EEXIST.  The latter happens when
1292         * something else creates the file between our two open().
1293         */
1294    }
1295
1296    block->page_size = qemu_fd_getpagesize(fd);
1297    block->mr->align = block->page_size;
1298#if defined(__s390x__)
1299    if (kvm_enabled()) {
1300        block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1301    }
1302#endif
1303
1304    file_size = get_file_size(fd);
1305
1306    if (memory < block->page_size) {
1307        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1308                   "or larger than page size 0x%zx",
1309                   memory, block->page_size);
1310        goto error;
1311    }
1312
1313    if (file_size > 0 && file_size < memory) {
1314        error_setg(errp, "backing store %s size 0x%" PRIx64
1315                   " does not match 'size' option 0x" RAM_ADDR_FMT,
1316                   path, file_size, memory);
1317        goto error;
1318    }
1319
1320    memory = ROUND_UP(memory, block->page_size);
1321
1322    /*
1323     * ftruncate is not supported by hugetlbfs in older
1324     * hosts, so don't bother bailing out on errors.
1325     * If anything goes wrong with it under other filesystems,
1326     * mmap will fail.
1327     *
1328     * Do not truncate the non-empty backend file to avoid corrupting
1329     * the existing data in the file. Disabling shrinking is not
1330     * enough. For example, the current vNVDIMM implementation stores
1331     * the guest NVDIMM labels at the end of the backend file. If the
1332     * backend file is later extended, QEMU will not be able to find
1333     * those labels. Therefore, extending the non-empty backend file
1334     * is disabled as well.
1335     */
1336    if (!file_size && ftruncate(fd, memory)) {
1337        perror("ftruncate");
1338    }
1339
1340    area = qemu_ram_mmap(fd, memory, block->mr->align,
1341                         block->flags & RAM_SHARED);
1342    if (area == MAP_FAILED) {
1343        error_setg_errno(errp, errno,
1344                         "unable to map backing store for guest RAM");
1345        goto error;
1346    }
1347
1348    if (mem_prealloc) {
1349        os_mem_prealloc(fd, area, memory, errp);
1350        if (errp && *errp) {
1351            goto error;
1352        }
1353    }
1354
1355    block->fd = fd;
1356    return area;
1357
1358error:
1359    if (area != MAP_FAILED) {
1360        qemu_ram_munmap(area, memory);
1361    }
1362    if (unlink_on_error) {
1363        unlink(path);
1364    }
1365    if (fd != -1) {
1366        close(fd);
1367    }
1368    return NULL;
1369}
1370#endif
1371
1372/* Called with the ramlist lock held.  */
1373static ram_addr_t find_ram_offset(ram_addr_t size)
1374{
1375    RAMBlock *block, *next_block;
1376    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1377
1378    assert(size != 0); /* it would hand out same offset multiple times */
1379
1380    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1381        return 0;
1382    }
1383
1384    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1385        ram_addr_t end, next = RAM_ADDR_MAX;
1386
1387        end = block->offset + block->max_length;
1388
1389        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1390            if (next_block->offset >= end) {
1391                next = MIN(next, next_block->offset);
1392            }
1393        }
1394        if (next - end >= size && next - end < mingap) {
1395            offset = end;
1396            mingap = next - end;
1397        }
1398    }
1399
1400    if (offset == RAM_ADDR_MAX) {
1401        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1402                (uint64_t)size);
1403        abort();
1404    }
1405
1406    return offset;
1407}
1408
1409ram_addr_t last_ram_offset(void)
1410{
1411    RAMBlock *block;
1412    ram_addr_t last = 0;
1413
1414    rcu_read_lock();
1415    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1416        last = MAX(last, block->offset + block->max_length);
1417    }
1418    rcu_read_unlock();
1419    return last;
1420}
1421
1422static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1423{
1424    int ret;
1425
1426    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1427    if (!machine_dump_guest_core(current_machine)) {
1428        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1429        if (ret) {
1430            perror("qemu_madvise");
1431            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1432                            "but dump_guest_core=off specified\n");
1433        }
1434    }
1435}
1436
1437const char *qemu_ram_get_idstr(RAMBlock *rb)
1438{
1439    return rb->idstr;
1440}
1441
1442/* Called with iothread lock held.  */
1443void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1444{
1445    RAMBlock *block;
1446
1447    assert(new_block);
1448    assert(!new_block->idstr[0]);
1449
1450    if (dev) {
1451        char *id = qdev_get_dev_path(dev);
1452        if (id) {
1453            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1454            g_free(id);
1455        }
1456    }
1457    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1458
1459    rcu_read_lock();
1460    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1461        if (block != new_block &&
1462            !strcmp(block->idstr, new_block->idstr)) {
1463            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1464                    new_block->idstr);
1465            abort();
1466        }
1467    }
1468    rcu_read_unlock();
1469}
1470
1471/* Called with iothread lock held.  */
1472void qemu_ram_unset_idstr(RAMBlock *block)
1473{
1474    /* FIXME: arch_init.c assumes that this is not called throughout
1475     * migration.  Ignore the problem since hot-unplug during migration
1476     * does not work anyway.
1477     */
1478    if (block) {
1479        memset(block->idstr, 0, sizeof(block->idstr));
1480    }
1481}
1482
1483size_t qemu_ram_pagesize(RAMBlock *rb)
1484{
1485    return rb->page_size;
1486}
1487
1488static int memory_try_enable_merging(void *addr, size_t len)
1489{
1490    if (!machine_mem_merge(current_machine)) {
1491        /* disabled by the user */
1492        return 0;
1493    }
1494
1495    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1496}
1497
1498/* Only legal before guest might have detected the memory size: e.g. on
1499 * incoming migration, or right after reset.
1500 *
1501 * As memory core doesn't know how is memory accessed, it is up to
1502 * resize callback to update device state and/or add assertions to detect
1503 * misuse, if necessary.
1504 */
1505int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1506{
1507    assert(block);
1508
1509    newsize = HOST_PAGE_ALIGN(newsize);
1510
1511    if (block->used_length == newsize) {
1512        return 0;
1513    }
1514
1515    if (!(block->flags & RAM_RESIZEABLE)) {
1516        error_setg_errno(errp, EINVAL,
1517                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1518                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1519                         newsize, block->used_length);
1520        return -EINVAL;
1521    }
1522
1523    if (block->max_length < newsize) {
1524        error_setg_errno(errp, EINVAL,
1525                         "Length too large: %s: 0x" RAM_ADDR_FMT
1526                         " > 0x" RAM_ADDR_FMT, block->idstr,
1527                         newsize, block->max_length);
1528        return -EINVAL;
1529    }
1530
1531    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1532    block->used_length = newsize;
1533    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1534                                        DIRTY_CLIENTS_ALL);
1535    memory_region_set_size(block->mr, newsize);
1536    if (block->resized) {
1537        block->resized(block->idstr, newsize, block->host);
1538    }
1539    return 0;
1540}
1541
1542/* Called with ram_list.mutex held */
1543static void dirty_memory_extend(ram_addr_t old_ram_size,
1544                                ram_addr_t new_ram_size)
1545{
1546    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1547                                             DIRTY_MEMORY_BLOCK_SIZE);
1548    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1549                                             DIRTY_MEMORY_BLOCK_SIZE);
1550    int i;
1551
1552    /* Only need to extend if block count increased */
1553    if (new_num_blocks <= old_num_blocks) {
1554        return;
1555    }
1556
1557    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1558        DirtyMemoryBlocks *old_blocks;
1559        DirtyMemoryBlocks *new_blocks;
1560        int j;
1561
1562        old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1563        new_blocks = g_malloc(sizeof(*new_blocks) +
1564                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
1565
1566        if (old_num_blocks) {
1567            memcpy(new_blocks->blocks, old_blocks->blocks,
1568                   old_num_blocks * sizeof(old_blocks->blocks[0]));
1569        }
1570
1571        for (j = old_num_blocks; j < new_num_blocks; j++) {
1572            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1573        }
1574
1575        atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1576
1577        if (old_blocks) {
1578            g_free_rcu(old_blocks, rcu);
1579        }
1580    }
1581}
1582
1583static void ram_block_add(RAMBlock *new_block, Error **errp)
1584{
1585    RAMBlock *block;
1586    RAMBlock *last_block = NULL;
1587    ram_addr_t old_ram_size, new_ram_size;
1588    Error *err = NULL;
1589
1590    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1591
1592    qemu_mutex_lock_ramlist();
1593    new_block->offset = find_ram_offset(new_block->max_length);
1594
1595    if (!new_block->host) {
1596        if (xen_enabled()) {
1597            xen_ram_alloc(new_block->offset, new_block->max_length,
1598                          new_block->mr, &err);
1599            if (err) {
1600                error_propagate(errp, err);
1601                qemu_mutex_unlock_ramlist();
1602                return;
1603            }
1604        } else {
1605            new_block->host = phys_mem_alloc(new_block->max_length,
1606                                             &new_block->mr->align);
1607            if (!new_block->host) {
1608                error_setg_errno(errp, errno,
1609                                 "cannot set up guest memory '%s'",
1610                                 memory_region_name(new_block->mr));
1611                qemu_mutex_unlock_ramlist();
1612                return;
1613            }
1614            memory_try_enable_merging(new_block->host, new_block->max_length);
1615        }
1616    }
1617
1618    new_ram_size = MAX(old_ram_size,
1619              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1620    if (new_ram_size > old_ram_size) {
1621        migration_bitmap_extend(old_ram_size, new_ram_size);
1622        dirty_memory_extend(old_ram_size, new_ram_size);
1623    }
1624    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1625     * QLIST (which has an RCU-friendly variant) does not have insertion at
1626     * tail, so save the last element in last_block.
1627     */
1628    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1629        last_block = block;
1630        if (block->max_length < new_block->max_length) {
1631            break;
1632        }
1633    }
1634    if (block) {
1635        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1636    } else if (last_block) {
1637        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1638    } else { /* list is empty */
1639        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1640    }
1641    ram_list.mru_block = NULL;
1642
1643    /* Write list before version */
1644    smp_wmb();
1645    ram_list.version++;
1646    qemu_mutex_unlock_ramlist();
1647
1648    cpu_physical_memory_set_dirty_range(new_block->offset,
1649                                        new_block->used_length,
1650                                        DIRTY_CLIENTS_ALL);
1651
1652    if (new_block->host) {
1653        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1654        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1655        /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1656        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1657    }
1658}
1659
1660#ifdef __linux__
1661RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662                                   bool share, const char *mem_path,
1663                                   Error **errp)
1664{
1665    RAMBlock *new_block;
1666    Error *local_err = NULL;
1667
1668    if (xen_enabled()) {
1669        error_setg(errp, "-mem-path not supported with Xen");
1670        return NULL;
1671    }
1672
1673    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1674        /*
1675         * file_ram_alloc() needs to allocate just like
1676         * phys_mem_alloc, but we haven't bothered to provide
1677         * a hook there.
1678         */
1679        error_setg(errp,
1680                   "-mem-path not supported with this accelerator");
1681        return NULL;
1682    }
1683
1684    size = HOST_PAGE_ALIGN(size);
1685    new_block = g_malloc0(sizeof(*new_block));
1686    new_block->mr = mr;
1687    new_block->used_length = size;
1688    new_block->max_length = size;
1689    new_block->flags = share ? RAM_SHARED : 0;
1690    new_block->host = file_ram_alloc(new_block, size,
1691                                     mem_path, errp);
1692    if (!new_block->host) {
1693        g_free(new_block);
1694        return NULL;
1695    }
1696
1697    ram_block_add(new_block, &local_err);
1698    if (local_err) {
1699        g_free(new_block);
1700        error_propagate(errp, local_err);
1701        return NULL;
1702    }
1703    return new_block;
1704}
1705#endif
1706
1707static
1708RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709                                  void (*resized)(const char*,
1710                                                  uint64_t length,
1711                                                  void *host),
1712                                  void *host, bool resizeable,
1713                                  MemoryRegion *mr, Error **errp)
1714{
1715    RAMBlock *new_block;
1716    Error *local_err = NULL;
1717
1718    size = HOST_PAGE_ALIGN(size);
1719    max_size = HOST_PAGE_ALIGN(max_size);
1720    new_block = g_malloc0(sizeof(*new_block));
1721    new_block->mr = mr;
1722    new_block->resized = resized;
1723    new_block->used_length = size;
1724    new_block->max_length = max_size;
1725    assert(max_size >= size);
1726    new_block->fd = -1;
1727    new_block->page_size = getpagesize();
1728    new_block->host = host;
1729    if (host) {
1730        new_block->flags |= RAM_PREALLOC;
1731    }
1732    if (resizeable) {
1733        new_block->flags |= RAM_RESIZEABLE;
1734    }
1735    ram_block_add(new_block, &local_err);
1736    if (local_err) {
1737        g_free(new_block);
1738        error_propagate(errp, local_err);
1739        return NULL;
1740    }
1741    return new_block;
1742}
1743
1744RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745                                   MemoryRegion *mr, Error **errp)
1746{
1747    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1748}
1749
1750RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1751{
1752    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1753}
1754
1755RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756                                     void (*resized)(const char*,
1757                                                     uint64_t length,
1758                                                     void *host),
1759                                     MemoryRegion *mr, Error **errp)
1760{
1761    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1762}
1763
1764static void reclaim_ramblock(RAMBlock *block)
1765{
1766    if (block->flags & RAM_PREALLOC) {
1767        ;
1768    } else if (xen_enabled()) {
1769        xen_invalidate_map_cache_entry(block->host);
1770#ifndef _WIN32
1771    } else if (block->fd >= 0) {
1772        qemu_ram_munmap(block->host, block->max_length);
1773        close(block->fd);
1774#endif
1775    } else {
1776        qemu_anon_ram_free(block->host, block->max_length);
1777    }
1778    g_free(block);
1779}
1780
1781void qemu_ram_free(RAMBlock *block)
1782{
1783    if (!block) {
1784        return;
1785    }
1786
1787    qemu_mutex_lock_ramlist();
1788    QLIST_REMOVE_RCU(block, next);
1789    ram_list.mru_block = NULL;
1790    /* Write list before version */
1791    smp_wmb();
1792    ram_list.version++;
1793    call_rcu(block, reclaim_ramblock, rcu);
1794    qemu_mutex_unlock_ramlist();
1795}
1796
1797#ifndef _WIN32
1798void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1799{
1800    RAMBlock *block;
1801    ram_addr_t offset;
1802    int flags;
1803    void *area, *vaddr;
1804
1805    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806        offset = addr - block->offset;
1807        if (offset < block->max_length) {
1808            vaddr = ramblock_ptr(block, offset);
1809            if (block->flags & RAM_PREALLOC) {
1810                ;
1811            } else if (xen_enabled()) {
1812                abort();
1813            } else {
1814                flags = MAP_FIXED;
1815                if (block->fd >= 0) {
1816                    flags |= (block->flags & RAM_SHARED ?
1817                              MAP_SHARED : MAP_PRIVATE);
1818                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1819                                flags, block->fd, offset);
1820                } else {
1821                    /*
1822                     * Remap needs to match alloc.  Accelerators that
1823                     * set phys_mem_alloc never remap.  If they did,
1824                     * we'd need a remap hook here.
1825                     */
1826                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1827
1828                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1829                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1830                                flags, -1, 0);
1831                }
1832                if (area != vaddr) {
1833                    fprintf(stderr, "Could not remap addr: "
1834                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1835                            length, addr);
1836                    exit(1);
1837                }
1838                memory_try_enable_merging(vaddr, length);
1839                qemu_ram_setup_dump(vaddr, length);
1840            }
1841        }
1842    }
1843}
1844#endif /* !_WIN32 */
1845
1846/* Return a host pointer to ram allocated with qemu_ram_alloc.
1847 * This should not be used for general purpose DMA.  Use address_space_map
1848 * or address_space_rw instead. For local memory (e.g. video ram) that the
1849 * device owns, use memory_region_get_ram_ptr.
1850 *
1851 * Called within RCU critical section.
1852 */
1853void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1854{
1855    RAMBlock *block = ram_block;
1856
1857    if (block == NULL) {
1858        block = qemu_get_ram_block(addr);
1859        addr -= block->offset;
1860    }
1861
1862    if (xen_enabled() && block->host == NULL) {
1863        /* We need to check if the requested address is in the RAM
1864         * because we don't want to map the entire memory in QEMU.
1865         * In that case just map until the end of the page.
1866         */
1867        if (block->offset == 0) {
1868            return xen_map_cache(addr, 0, 0);
1869        }
1870
1871        block->host = xen_map_cache(block->offset, block->max_length, 1);
1872    }
1873    return ramblock_ptr(block, addr);
1874}
1875
1876/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877 * but takes a size argument.
1878 *
1879 * Called within RCU critical section.
1880 */
1881static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1882                                 hwaddr *size)
1883{
1884    RAMBlock *block = ram_block;
1885    if (*size == 0) {
1886        return NULL;
1887    }
1888
1889    if (block == NULL) {
1890        block = qemu_get_ram_block(addr);
1891        addr -= block->offset;
1892    }
1893    *size = MIN(*size, block->max_length - addr);
1894
1895    if (xen_enabled() && block->host == NULL) {
1896        /* We need to check if the requested address is in the RAM
1897         * because we don't want to map the entire memory in QEMU.
1898         * In that case just map the requested area.
1899         */
1900        if (block->offset == 0) {
1901            return xen_map_cache(addr, *size, 1);
1902        }
1903
1904        block->host = xen_map_cache(block->offset, block->max_length, 1);
1905    }
1906
1907    return ramblock_ptr(block, addr);
1908}
1909
1910/*
1911 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1912 * in that RAMBlock.
1913 *
1914 * ptr: Host pointer to look up
1915 * round_offset: If true round the result offset down to a page boundary
1916 * *ram_addr: set to result ram_addr
1917 * *offset: set to result offset within the RAMBlock
1918 *
1919 * Returns: RAMBlock (or NULL if not found)
1920 *
1921 * By the time this function returns, the returned pointer is not protected
1922 * by RCU anymore.  If the caller is not within an RCU critical section and
1923 * does not hold the iothread lock, it must have other means of protecting the
1924 * pointer, such as a reference to the region that includes the incoming
1925 * ram_addr_t.
1926 */
1927RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1928                                   ram_addr_t *offset)
1929{
1930    RAMBlock *block;
1931    uint8_t *host = ptr;
1932
1933    if (xen_enabled()) {
1934        ram_addr_t ram_addr;
1935        rcu_read_lock();
1936        ram_addr = xen_ram_addr_from_mapcache(ptr);
1937        block = qemu_get_ram_block(ram_addr);
1938        if (block) {
1939            *offset = ram_addr - block->offset;
1940        }
1941        rcu_read_unlock();
1942        return block;
1943    }
1944
1945    rcu_read_lock();
1946    block = atomic_rcu_read(&ram_list.mru_block);
1947    if (block && block->host && host - block->host < block->max_length) {
1948        goto found;
1949    }
1950
1951    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1952        /* This case append when the block is not mapped. */
1953        if (block->host == NULL) {
1954            continue;
1955        }
1956        if (host - block->host < block->max_length) {
1957            goto found;
1958        }
1959    }
1960
1961    rcu_read_unlock();
1962    return NULL;
1963
1964found:
1965    *offset = (host - block->host);
1966    if (round_offset) {
1967        *offset &= TARGET_PAGE_MASK;
1968    }
1969    rcu_read_unlock();
1970    return block;
1971}
1972
1973/*
1974 * Finds the named RAMBlock
1975 *
1976 * name: The name of RAMBlock to find
1977 *
1978 * Returns: RAMBlock (or NULL if not found)
1979 */
1980RAMBlock *qemu_ram_block_by_name(const char *name)
1981{
1982    RAMBlock *block;
1983
1984    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985        if (!strcmp(name, block->idstr)) {
1986            return block;
1987        }
1988    }
1989
1990    return NULL;
1991}
1992
1993/* Some of the softmmu routines need to translate from a host pointer
1994   (typically a TLB entry) back to a ram offset.  */
1995ram_addr_t qemu_ram_addr_from_host(void *ptr)
1996{
1997    RAMBlock *block;
1998    ram_addr_t offset;
1999
2000    block = qemu_ram_block_from_host(ptr, false, &offset);
2001    if (!block) {
2002        return RAM_ADDR_INVALID;
2003    }
2004
2005    return block->offset + offset;
2006}
2007
2008/* Called within RCU critical section.  */
2009static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010                               uint64_t val, unsigned size)
2011{
2012    bool locked = false;
2013
2014    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2015        locked = true;
2016        tb_lock();
2017        tb_invalidate_phys_page_fast(ram_addr, size);
2018    }
2019    switch (size) {
2020    case 1:
2021        stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2022        break;
2023    case 2:
2024        stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2025        break;
2026    case 4:
2027        stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2028        break;
2029    default:
2030        abort();
2031    }
2032
2033    if (locked) {
2034        tb_unlock();
2035    }
2036
2037    /* Set both VGA and migration bits for simplicity and to remove
2038     * the notdirty callback faster.
2039     */
2040    cpu_physical_memory_set_dirty_range(ram_addr, size,
2041                                        DIRTY_CLIENTS_NOCODE);
2042    /* we remove the notdirty callback only if the code has been
2043       flushed */
2044    if (!cpu_physical_memory_is_clean(ram_addr)) {
2045        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2046    }
2047}
2048
2049static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2050                                 unsigned size, bool is_write)
2051{
2052    return is_write;
2053}
2054
2055static const MemoryRegionOps notdirty_mem_ops = {
2056    .write = notdirty_mem_write,
2057    .valid.accepts = notdirty_mem_accepts,
2058    .endianness = DEVICE_NATIVE_ENDIAN,
2059};
2060
2061/* Generate a debug exception if a watchpoint has been hit.  */
2062static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2063{
2064    CPUState *cpu = current_cpu;
2065    CPUClass *cc = CPU_GET_CLASS(cpu);
2066    CPUArchState *env = cpu->env_ptr;
2067    target_ulong pc, cs_base;
2068    target_ulong vaddr;
2069    CPUWatchpoint *wp;
2070    uint32_t cpu_flags;
2071
2072    if (cpu->watchpoint_hit) {
2073        /* We re-entered the check after replacing the TB. Now raise
2074         * the debug interrupt so that is will trigger after the
2075         * current instruction. */
2076        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2077        return;
2078    }
2079    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2080    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2081        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2082            && (wp->flags & flags)) {
2083            if (flags == BP_MEM_READ) {
2084                wp->flags |= BP_WATCHPOINT_HIT_READ;
2085            } else {
2086                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2087            }
2088            wp->hitaddr = vaddr;
2089            wp->hitattrs = attrs;
2090            if (!cpu->watchpoint_hit) {
2091                if (wp->flags & BP_CPU &&
2092                    !cc->debug_check_watchpoint(cpu, wp)) {
2093                    wp->flags &= ~BP_WATCHPOINT_HIT;
2094                    continue;
2095                }
2096                cpu->watchpoint_hit = wp;
2097
2098                /* The tb_lock will be reset when cpu_loop_exit or
2099                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2100                 * main loop.
2101                 */
2102                tb_lock();
2103                tb_check_watchpoint(cpu);
2104                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2105                    cpu->exception_index = EXCP_DEBUG;
2106                    cpu_loop_exit(cpu);
2107                } else {
2108                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2109                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2110                    cpu_loop_exit_noexc(cpu);
2111                }
2112            }
2113        } else {
2114            wp->flags &= ~BP_WATCHPOINT_HIT;
2115        }
2116    }
2117}
2118
2119/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2120   so these check for a hit then pass through to the normal out-of-line
2121   phys routines.  */
2122static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2123                                  unsigned size, MemTxAttrs attrs)
2124{
2125    MemTxResult res;
2126    uint64_t data;
2127    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2128    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2129
2130    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2131    switch (size) {
2132    case 1:
2133        data = address_space_ldub(as, addr, attrs, &res);
2134        break;
2135    case 2:
2136        data = address_space_lduw(as, addr, attrs, &res);
2137        break;
2138    case 4:
2139        data = address_space_ldl(as, addr, attrs, &res);
2140        break;
2141    default: abort();
2142    }
2143    *pdata = data;
2144    return res;
2145}
2146
2147static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2148                                   uint64_t val, unsigned size,
2149                                   MemTxAttrs attrs)
2150{
2151    MemTxResult res;
2152    int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2153    AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2154
2155    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2156    switch (size) {
2157    case 1:
2158        address_space_stb(as, addr, val, attrs, &res);
2159        break;
2160    case 2:
2161        address_space_stw(as, addr, val, attrs, &res);
2162        break;
2163    case 4:
2164        address_space_stl(as, addr, val, attrs, &res);
2165        break;
2166    default: abort();
2167    }
2168    return res;
2169}
2170
2171static const MemoryRegionOps watch_mem_ops = {
2172    .read_with_attrs = watch_mem_read,
2173    .write_with_attrs = watch_mem_write,
2174    .endianness = DEVICE_NATIVE_ENDIAN,
2175};
2176
2177static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2178                                unsigned len, MemTxAttrs attrs)
2179{
2180    subpage_t *subpage = opaque;
2181    uint8_t buf[8];
2182    MemTxResult res;
2183
2184#if defined(DEBUG_SUBPAGE)
2185    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2186           subpage, len, addr);
2187#endif
2188    res = address_space_read(subpage->as, addr + subpage->base,
2189                             attrs, buf, len);
2190    if (res) {
2191        return res;
2192    }
2193    switch (len) {
2194    case 1:
2195        *data = ldub_p(buf);
2196        return MEMTX_OK;
2197    case 2:
2198        *data = lduw_p(buf);
2199        return MEMTX_OK;
2200    case 4:
2201        *data = ldl_p(buf);
2202        return MEMTX_OK;
2203    case 8:
2204        *data = ldq_p(buf);
2205        return MEMTX_OK;
2206    default:
2207        abort();
2208    }
2209}
2210
2211static MemTxResult subpage_write(void *opaque, hwaddr addr,
2212                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2213{
2214    subpage_t *subpage = opaque;
2215    uint8_t buf[8];
2216
2217#if defined(DEBUG_SUBPAGE)
2218    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2219           " value %"PRIx64"\n",
2220           __func__, subpage, len, addr, value);
2221#endif
2222    switch (len) {
2223    case 1:
2224        stb_p(buf, value);
2225        break;
2226    case 2:
2227        stw_p(buf, value);
2228        break;
2229    case 4:
2230        stl_p(buf, value);
2231        break;
2232    case 8:
2233        stq_p(buf, value);
2234        break;
2235    default:
2236        abort();
2237    }
2238    return address_space_write(subpage->as, addr + subpage->base,
2239                               attrs, buf, len);
2240}
2241
2242static bool subpage_accepts(void *opaque, hwaddr addr,
2243                            unsigned len, bool is_write)
2244{
2245    subpage_t *subpage = opaque;
2246#if defined(DEBUG_SUBPAGE)
2247    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2248           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2249#endif
2250
2251    return address_space_access_valid(subpage->as, addr + subpage->base,
2252                                      len, is_write);
2253}
2254
2255static const MemoryRegionOps subpage_ops = {
2256    .read_with_attrs = subpage_read,
2257    .write_with_attrs = subpage_write,
2258    .impl.min_access_size = 1,
2259    .impl.max_access_size = 8,
2260    .valid.min_access_size = 1,
2261    .valid.max_access_size = 8,
2262    .valid.accepts = subpage_accepts,
2263    .endianness = DEVICE_NATIVE_ENDIAN,
2264};
2265
2266static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2267                             uint16_t section)
2268{
2269    int idx, eidx;
2270
2271    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2272        return -1;
2273    idx = SUBPAGE_IDX(start);
2274    eidx = SUBPAGE_IDX(end);
2275#if defined(DEBUG_SUBPAGE)
2276    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2277           __func__, mmio, start, end, idx, eidx, section);
2278#endif
2279    for (; idx <= eidx; idx++) {
2280        mmio->sub_section[idx] = section;
2281    }
2282
2283    return 0;
2284}
2285
2286static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2287{
2288    subpage_t *mmio;
2289
2290    mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2291    mmio->as = as;
2292    mmio->base = base;
2293    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2294                          NULL, TARGET_PAGE_SIZE);
2295    mmio->iomem.subpage = true;
2296#if defined(DEBUG_SUBPAGE)
2297    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2298           mmio, base, TARGET_PAGE_SIZE);
2299#endif
2300    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2301
2302    return mmio;
2303}
2304
2305static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2306                              MemoryRegion *mr)
2307{
2308    assert(as);
2309    MemoryRegionSection section = {
2310        .address_space = as,
2311        .mr = mr,
2312        .offset_within_address_space = 0,
2313        .offset_within_region = 0,
2314        .size = int128_2_64(),
2315    };
2316
2317    return phys_section_add(map, &section);
2318}
2319
2320MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2321{
2322    int asidx = cpu_asidx_from_attrs(cpu, attrs);
2323    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2324    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2325    MemoryRegionSection *sections = d->map.sections;
2326
2327    return sections[index & ~TARGET_PAGE_MASK].mr;
2328}
2329
2330static void io_mem_init(void)
2331{
2332    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2333    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2334                          NULL, UINT64_MAX);
2335    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2336                          NULL, UINT64_MAX);
2337    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2338                          NULL, UINT64_MAX);
2339}
2340
2341static void mem_begin(MemoryListener *listener)
2342{
2343    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2344    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2345    uint16_t n;
2346
2347    n = dummy_section(&d->map, as, &io_mem_unassigned);
2348    assert(n == PHYS_SECTION_UNASSIGNED);
2349    n = dummy_section(&d->map, as, &io_mem_notdirty);
2350    assert(n == PHYS_SECTION_NOTDIRTY);
2351    n = dummy_section(&d->map, as, &io_mem_rom);
2352    assert(n == PHYS_SECTION_ROM);
2353    n = dummy_section(&d->map, as, &io_mem_watch);
2354    assert(n == PHYS_SECTION_WATCH);
2355
2356    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2357    d->as = as;
2358    as->next_dispatch = d;
2359}
2360
2361static void address_space_dispatch_free(AddressSpaceDispatch *d)
2362{
2363    phys_sections_free(&d->map);
2364    g_free(d);
2365}
2366
2367static void mem_commit(MemoryListener *listener)
2368{
2369    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2370    AddressSpaceDispatch *cur = as->dispatch;
2371    AddressSpaceDispatch *next = as->next_dispatch;
2372
2373    phys_page_compact_all(next, next->map.nodes_nb);
2374
2375    atomic_rcu_set(&as->dispatch, next);
2376    if (cur) {
2377        call_rcu(cur, address_space_dispatch_free, rcu);
2378    }
2379}
2380
2381static void tcg_commit(MemoryListener *listener)
2382{
2383    CPUAddressSpace *cpuas;
2384    AddressSpaceDispatch *d;
2385
2386    /* since each CPU stores ram addresses in its TLB cache, we must
2387       reset the modified entries */
2388    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2389    cpu_reloading_memory_map();
2390    /* The CPU and TLB are protected by the iothread lock.
2391     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2392     * may have split the RCU critical section.
2393     */
2394    d = atomic_rcu_read(&cpuas->as->dispatch);
2395    atomic_rcu_set(&cpuas->memory_dispatch, d);
2396    tlb_flush(cpuas->cpu, 1);
2397}
2398
2399void address_space_init_dispatch(AddressSpace *as)
2400{
2401    as->dispatch = NULL;
2402    as->dispatch_listener = (MemoryListener) {
2403        .begin = mem_begin,
2404        .commit = mem_commit,
2405        .region_add = mem_add,
2406        .region_nop = mem_add,
2407        .priority = 0,
2408    };
2409    memory_listener_register(&as->dispatch_listener, as);
2410}
2411
2412void address_space_unregister(AddressSpace *as)
2413{
2414    memory_listener_unregister(&as->dispatch_listener);
2415}
2416
2417void address_space_destroy_dispatch(AddressSpace *as)
2418{
2419    AddressSpaceDispatch *d = as->dispatch;
2420
2421    atomic_rcu_set(&as->dispatch, NULL);
2422    if (d) {
2423        call_rcu(d, address_space_dispatch_free, rcu);
2424    }
2425}
2426
2427static void memory_map_init(void)
2428{
2429    system_memory = g_malloc(sizeof(*system_memory));
2430
2431    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2432    address_space_init(&address_space_memory, system_memory, "memory");
2433
2434    system_io = g_malloc(sizeof(*system_io));
2435    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2436                          65536);
2437    address_space_init(&address_space_io, system_io, "I/O");
2438}
2439
2440MemoryRegion *get_system_memory(void)
2441{
2442    return system_memory;
2443}
2444
2445MemoryRegion *get_system_io(void)
2446{
2447    return system_io;
2448}
2449
2450#endif /* !defined(CONFIG_USER_ONLY) */
2451
2452/* physical memory access (slow version, mainly for debug) */
2453#if defined(CONFIG_USER_ONLY)
2454int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2455                        uint8_t *buf, int len, int is_write)
2456{
2457    int l, flags;
2458    target_ulong page;
2459    void * p;
2460
2461    while (len > 0) {
2462        page = addr & TARGET_PAGE_MASK;
2463        l = (page + TARGET_PAGE_SIZE) - addr;
2464        if (l > len)
2465            l = len;
2466        flags = page_get_flags(page);
2467        if (!(flags & PAGE_VALID))
2468            return -1;
2469        if (is_write) {
2470            if (!(flags & PAGE_WRITE))
2471                return -1;
2472            /* XXX: this code should not depend on lock_user */
2473            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2474                return -1;
2475            memcpy(p, buf, l);
2476            unlock_user(p, addr, l);
2477        } else {
2478            if (!(flags & PAGE_READ))
2479                return -1;
2480            /* XXX: this code should not depend on lock_user */
2481            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2482                return -1;
2483            memcpy(buf, p, l);
2484            unlock_user(p, addr, 0);
2485        }
2486        len -= l;
2487        buf += l;
2488        addr += l;
2489    }
2490    return 0;
2491}
2492
2493#else
2494
2495static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2496                                     hwaddr length)
2497{
2498    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2499    addr += memory_region_get_ram_addr(mr);
2500
2501    /* No early return if dirty_log_mask is or becomes 0, because
2502     * cpu_physical_memory_set_dirty_range will still call
2503     * xen_modified_memory.
2504     */
2505    if (dirty_log_mask) {
2506        dirty_log_mask =
2507            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2508    }
2509    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2510        tb_lock();
2511        tb_invalidate_phys_range(addr, addr + length);
2512        tb_unlock();
2513        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2514    }
2515    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2516}
2517
2518static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2519{
2520    unsigned access_size_max = mr->ops->valid.max_access_size;
2521
2522    /* Regions are assumed to support 1-4 byte accesses unless
2523       otherwise specified.  */
2524    if (access_size_max == 0) {
2525        access_size_max = 4;
2526    }
2527
2528    /* Bound the maximum access by the alignment of the address.  */
2529    if (!mr->ops->impl.unaligned) {
2530        unsigned align_size_max = addr & -addr;
2531        if (align_size_max != 0 && align_size_max < access_size_max) {
2532            access_size_max = align_size_max;
2533        }
2534    }
2535
2536    /* Don't attempt accesses larger than the maximum.  */
2537    if (l > access_size_max) {
2538        l = access_size_max;
2539    }
2540    l = pow2floor(l);
2541
2542    return l;
2543}
2544
2545static bool prepare_mmio_access(MemoryRegion *mr)
2546{
2547    bool unlocked = !qemu_mutex_iothread_locked();
2548    bool release_lock = false;
2549
2550    if (unlocked && mr->global_locking) {
2551        qemu_mutex_lock_iothread();
2552        unlocked = false;
2553        release_lock = true;
2554    }
2555    if (mr->flush_coalesced_mmio) {
2556        if (unlocked) {
2557            qemu_mutex_lock_iothread();
2558        }
2559        qemu_flush_coalesced_mmio_buffer();
2560        if (unlocked) {
2561            qemu_mutex_unlock_iothread();
2562        }
2563    }
2564
2565    return release_lock;
2566}
2567
2568/* Called within RCU critical section.  */
2569static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2570                                                MemTxAttrs attrs,
2571                                                const uint8_t *buf,
2572                                                int len, hwaddr addr1,
2573                                                hwaddr l, MemoryRegion *mr)
2574{
2575    uint8_t *ptr;
2576    uint64_t val;
2577    MemTxResult result = MEMTX_OK;
2578    bool release_lock = false;
2579
2580    for (;;) {
2581        if (!memory_access_is_direct(mr, true)) {
2582            release_lock |= prepare_mmio_access(mr);
2583            l = memory_access_size(mr, l, addr1);
2584            /* XXX: could force current_cpu to NULL to avoid
2585               potential bugs */
2586            switch (l) {
2587            case 8:
2588                /* 64 bit write access */
2589                val = ldq_p(buf);
2590                result |= memory_region_dispatch_write(mr, addr1, val, 8,
2591                                                       attrs);
2592                break;
2593            case 4:
2594                /* 32 bit write access */
2595                val = ldl_p(buf);
2596                result |= memory_region_dispatch_write(mr, addr1, val, 4,
2597                                                       attrs);
2598                break;
2599            case 2:
2600                /* 16 bit write access */
2601                val = lduw_p(buf);
2602                result |= memory_region_dispatch_write(mr, addr1, val, 2,
2603                                                       attrs);
2604                break;
2605            case 1:
2606                /* 8 bit write access */
2607                val = ldub_p(buf);
2608                result |= memory_region_dispatch_write(mr, addr1, val, 1,
2609                                                       attrs);
2610                break;
2611            default:
2612                abort();
2613            }
2614        } else {
2615            /* RAM case */
2616            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2617            memcpy(ptr, buf, l);
2618            invalidate_and_set_dirty(mr, addr1, l);
2619        }
2620
2621        if (release_lock) {
2622            qemu_mutex_unlock_iothread();
2623            release_lock = false;
2624        }
2625
2626        len -= l;
2627        buf += l;
2628        addr += l;
2629
2630        if (!len) {
2631            break;
2632        }
2633
2634        l = len;
2635        mr = address_space_translate(as, addr, &addr1, &l, true);
2636    }
2637
2638    return result;
2639}
2640
2641MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642                                const uint8_t *buf, int len)
2643{
2644    hwaddr l;
2645    hwaddr addr1;
2646    MemoryRegion *mr;
2647    MemTxResult result = MEMTX_OK;
2648
2649    if (len > 0) {
2650        rcu_read_lock();
2651        l = len;
2652        mr = address_space_translate(as, addr, &addr1, &l, true);
2653        result = address_space_write_continue(as, addr, attrs, buf, len,
2654                                              addr1, l, mr);
2655        rcu_read_unlock();
2656    }
2657
2658    return result;
2659}
2660
2661/* Called within RCU critical section.  */
2662MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2663                                        MemTxAttrs attrs, uint8_t *buf,
2664                                        int len, hwaddr addr1, hwaddr l,
2665                                        MemoryRegion *mr)
2666{
2667    uint8_t *ptr;
2668    uint64_t val;
2669    MemTxResult result = MEMTX_OK;
2670    bool release_lock = false;
2671
2672    for (;;) {
2673        if (!memory_access_is_direct(mr, false)) {
2674            /* I/O case */
2675            release_lock |= prepare_mmio_access(mr);
2676            l = memory_access_size(mr, l, addr1);
2677            switch (l) {
2678            case 8:
2679                /* 64 bit read access */
2680                result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2681                                                      attrs);
2682                stq_p(buf, val);
2683                break;
2684            case 4:
2685                /* 32 bit read access */
2686                result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2687                                                      attrs);
2688                stl_p(buf, val);
2689                break;
2690            case 2:
2691                /* 16 bit read access */
2692                result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2693                                                      attrs);
2694                stw_p(buf, val);
2695                break;
2696            case 1:
2697                /* 8 bit read access */
2698                result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2699                                                      attrs);
2700                stb_p(buf, val);
2701                break;
2702            default:
2703                abort();
2704            }
2705        } else {
2706            /* RAM case */
2707            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2708            memcpy(buf, ptr, l);
2709        }
2710
2711        if (release_lock) {
2712            qemu_mutex_unlock_iothread();
2713            release_lock = false;
2714        }
2715
2716        len -= l;
2717        buf += l;
2718        addr += l;
2719
2720        if (!len) {
2721            break;
2722        }
2723
2724        l = len;
2725        mr = address_space_translate(as, addr, &addr1, &l, false);
2726    }
2727
2728    return result;
2729}
2730
2731MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2732                                    MemTxAttrs attrs, uint8_t *buf, int len)
2733{
2734    hwaddr l;
2735    hwaddr addr1;
2736    MemoryRegion *mr;
2737    MemTxResult result = MEMTX_OK;
2738
2739    if (len > 0) {
2740        rcu_read_lock();
2741        l = len;
2742        mr = address_space_translate(as, addr, &addr1, &l, false);
2743        result = address_space_read_continue(as, addr, attrs, buf, len,
2744                                             addr1, l, mr);
2745        rcu_read_unlock();
2746    }
2747
2748    return result;
2749}
2750
2751MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2752                             uint8_t *buf, int len, bool is_write)
2753{
2754    if (is_write) {
2755        return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2756    } else {
2757        return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2758    }
2759}
2760
2761void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2762                            int len, int is_write)
2763{
2764    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2765                     buf, len, is_write);
2766}
2767
2768enum write_rom_type {
2769    WRITE_DATA,
2770    FLUSH_CACHE,
2771};
2772
2773static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2774    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2775{
2776    hwaddr l;
2777    uint8_t *ptr;
2778    hwaddr addr1;
2779    MemoryRegion *mr;
2780
2781    rcu_read_lock();
2782    while (len > 0) {
2783        l = len;
2784        mr = address_space_translate(as, addr, &addr1, &l, true);
2785
2786        if (!(memory_region_is_ram(mr) ||
2787              memory_region_is_romd(mr))) {
2788            l = memory_access_size(mr, l, addr1);
2789        } else {
2790            /* ROM/RAM case */
2791            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2792            switch (type) {
2793            case WRITE_DATA:
2794                memcpy(ptr, buf, l);
2795                invalidate_and_set_dirty(mr, addr1, l);
2796                break;
2797            case FLUSH_CACHE:
2798                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2799                break;
2800            }
2801        }
2802        len -= l;
2803        buf += l;
2804        addr += l;
2805    }
2806    rcu_read_unlock();
2807}
2808
2809/* used for ROM loading : can write in RAM and ROM */
2810void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2811                                   const uint8_t *buf, int len)
2812{
2813    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2814}
2815
2816void cpu_flush_icache_range(hwaddr start, int len)
2817{
2818    /*
2819     * This function should do the same thing as an icache flush that was
2820     * triggered from within the guest. For TCG we are always cache coherent,
2821     * so there is no need to flush anything. For KVM / Xen we need to flush
2822     * the host's instruction cache at least.
2823     */
2824    if (tcg_enabled()) {
2825        return;
2826    }
2827
2828    cpu_physical_memory_write_rom_internal(&address_space_memory,
2829                                           start, NULL, len, FLUSH_CACHE);
2830}
2831
2832typedef struct {
2833    MemoryRegion *mr;
2834    void *buffer;
2835    hwaddr addr;
2836    hwaddr len;
2837    bool in_use;
2838} BounceBuffer;
2839
2840static BounceBuffer bounce;
2841
2842typedef struct MapClient {
2843    QEMUBH *bh;
2844    QLIST_ENTRY(MapClient) link;
2845} MapClient;
2846
2847QemuMutex map_client_list_lock;
2848static QLIST_HEAD(map_client_list, MapClient) map_client_list
2849    = QLIST_HEAD_INITIALIZER(map_client_list);
2850
2851static void cpu_unregister_map_client_do(MapClient *client)
2852{
2853    QLIST_REMOVE(client, link);
2854    g_free(client);
2855}
2856
2857static void cpu_notify_map_clients_locked(void)
2858{
2859    MapClient *client;
2860
2861    while (!QLIST_EMPTY(&map_client_list)) {
2862        client = QLIST_FIRST(&map_client_list);
2863        qemu_bh_schedule(client->bh);
2864        cpu_unregister_map_client_do(client);
2865    }
2866}
2867
2868void cpu_register_map_client(QEMUBH *bh)
2869{
2870    MapClient *client = g_malloc(sizeof(*client));
2871
2872    qemu_mutex_lock(&map_client_list_lock);
2873    client->bh = bh;
2874    QLIST_INSERT_HEAD(&map_client_list, client, link);
2875    if (!atomic_read(&bounce.in_use)) {
2876        cpu_notify_map_clients_locked();
2877    }
2878    qemu_mutex_unlock(&map_client_list_lock);
2879}
2880
2881void cpu_exec_init_all(void)
2882{
2883    qemu_mutex_init(&ram_list.mutex);
2884    /* The data structures we set up here depend on knowing the page size,
2885     * so no more changes can be made after this point.
2886     * In an ideal world, nothing we did before we had finished the
2887     * machine setup would care about the target page size, and we could
2888     * do this much later, rather than requiring board models to state
2889     * up front what their requirements are.
2890     */
2891    finalize_target_page_bits();
2892    io_mem_init();
2893    memory_map_init();
2894    qemu_mutex_init(&map_client_list_lock);
2895}
2896
2897void cpu_unregister_map_client(QEMUBH *bh)
2898{
2899    MapClient *client;
2900
2901    qemu_mutex_lock(&map_client_list_lock);
2902    QLIST_FOREACH(client, &map_client_list, link) {
2903        if (client->bh == bh) {
2904            cpu_unregister_map_client_do(client);
2905            break;
2906        }
2907    }
2908    qemu_mutex_unlock(&map_client_list_lock);
2909}
2910
2911static void cpu_notify_map_clients(void)
2912{
2913    qemu_mutex_lock(&map_client_list_lock);
2914    cpu_notify_map_clients_locked();
2915    qemu_mutex_unlock(&map_client_list_lock);
2916}
2917
2918bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2919{
2920    MemoryRegion *mr;
2921    hwaddr l, xlat;
2922
2923    rcu_read_lock();
2924    while (len > 0) {
2925        l = len;
2926        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2927        if (!memory_access_is_direct(mr, is_write)) {
2928            l = memory_access_size(mr, l, addr);
2929            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2930                rcu_read_unlock();
2931                return false;
2932            }
2933        }
2934
2935        len -= l;
2936        addr += l;
2937    }
2938    rcu_read_unlock();
2939    return true;
2940}
2941
2942/* Map a physical memory region into a host virtual address.
2943 * May map a subset of the requested range, given by and returned in *plen.
2944 * May return NULL if resources needed to perform the mapping are exhausted.
2945 * Use only for reads OR writes - not for read-modify-write operations.
2946 * Use cpu_register_map_client() to know when retrying the map operation is
2947 * likely to succeed.
2948 */
2949void *address_space_map(AddressSpace *as,
2950                        hwaddr addr,
2951                        hwaddr *plen,
2952                        bool is_write)
2953{
2954    hwaddr len = *plen;
2955    hwaddr done = 0;
2956    hwaddr l, xlat, base;
2957    MemoryRegion *mr, *this_mr;
2958    void *ptr;
2959
2960    if (len == 0) {
2961        return NULL;
2962    }
2963
2964    l = len;
2965    rcu_read_lock();
2966    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2967
2968    if (!memory_access_is_direct(mr, is_write)) {
2969        if (atomic_xchg(&bounce.in_use, true)) {
2970            rcu_read_unlock();
2971            return NULL;
2972        }
2973        /* Avoid unbounded allocations */
2974        l = MIN(l, TARGET_PAGE_SIZE);
2975        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2976        bounce.addr = addr;
2977        bounce.len = l;
2978
2979        memory_region_ref(mr);
2980        bounce.mr = mr;
2981        if (!is_write) {
2982            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2983                               bounce.buffer, l);
2984        }
2985
2986        rcu_read_unlock();
2987        *plen = l;
2988        return bounce.buffer;
2989    }
2990
2991    base = xlat;
2992
2993    for (;;) {
2994        len -= l;
2995        addr += l;
2996        done += l;
2997        if (len == 0) {
2998            break;
2999        }
3000
3001        l = len;
3002        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3003        if (this_mr != mr || xlat != base + done) {
3004            break;
3005        }
3006    }
3007
3008    memory_region_ref(mr);
3009    *plen = done;
3010    ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3011    rcu_read_unlock();
3012
3013    return ptr;
3014}
3015
3016/* Unmaps a memory region previously mapped by address_space_map().
3017 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3018 * the amount of memory that was actually read or written by the caller.
3019 */
3020void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3021                         int is_write, hwaddr access_len)
3022{
3023    if (buffer != bounce.buffer) {
3024        MemoryRegion *mr;
3025        ram_addr_t addr1;
3026
3027        mr = memory_region_from_host(buffer, &addr1);
3028        assert(mr != NULL);
3029        if (is_write) {
3030            invalidate_and_set_dirty(mr, addr1, access_len);
3031        }
3032        if (xen_enabled()) {
3033            xen_invalidate_map_cache_entry(buffer);
3034        }
3035        memory_region_unref(mr);
3036        return;
3037    }
3038    if (is_write) {
3039        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3040                            bounce.buffer, access_len);
3041    }
3042    qemu_vfree(bounce.buffer);
3043    bounce.buffer = NULL;
3044    memory_region_unref(bounce.mr);
3045    atomic_mb_set(&bounce.in_use, false);
3046    cpu_notify_map_clients();
3047}
3048
3049void *cpu_physical_memory_map(hwaddr addr,
3050                              hwaddr *plen,
3051                              int is_write)
3052{
3053    return address_space_map(&address_space_memory, addr, plen, is_write);
3054}
3055
3056void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3057                               int is_write, hwaddr access_len)
3058{
3059    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3060}
3061
3062/* warning: addr must be aligned */
3063static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3064                                                  MemTxAttrs attrs,
3065                                                  MemTxResult *result,
3066                                                  enum device_endian endian)
3067{
3068    uint8_t *ptr;
3069    uint64_t val;
3070    MemoryRegion *mr;
3071    hwaddr l = 4;
3072    hwaddr addr1;
3073    MemTxResult r;
3074    bool release_lock = false;
3075
3076    rcu_read_lock();
3077    mr = address_space_translate(as, addr, &addr1, &l, false);
3078    if (l < 4 || !memory_access_is_direct(mr, false)) {
3079        release_lock |= prepare_mmio_access(mr);
3080
3081        /* I/O case */
3082        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3083#if defined(TARGET_WORDS_BIGENDIAN)
3084        if (endian == DEVICE_LITTLE_ENDIAN) {
3085            val = bswap32(val);
3086        }
3087#else
3088        if (endian == DEVICE_BIG_ENDIAN) {
3089            val = bswap32(val);
3090        }
3091#endif
3092    } else {
3093        /* RAM case */
3094        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3095        switch (endian) {
3096        case DEVICE_LITTLE_ENDIAN:
3097            val = ldl_le_p(ptr);
3098            break;
3099        case DEVICE_BIG_ENDIAN:
3100            val = ldl_be_p(ptr);
3101            break;
3102        default:
3103            val = ldl_p(ptr);
3104            break;
3105        }
3106        r = MEMTX_OK;
3107    }
3108    if (result) {
3109        *result = r;
3110    }
3111    if (release_lock) {
3112        qemu_mutex_unlock_iothread();
3113    }
3114    rcu_read_unlock();
3115    return val;
3116}
3117
3118uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3119                           MemTxAttrs attrs, MemTxResult *result)
3120{
3121    return address_space_ldl_internal(as, addr, attrs, result,
3122                                      DEVICE_NATIVE_ENDIAN);
3123}
3124
3125uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3126                              MemTxAttrs attrs, MemTxResult *result)
3127{
3128    return address_space_ldl_internal(as, addr, attrs, result,
3129                                      DEVICE_LITTLE_ENDIAN);
3130}
3131
3132uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3133                              MemTxAttrs attrs, MemTxResult *result)
3134{
3135    return address_space_ldl_internal(as, addr, attrs, result,
3136                                      DEVICE_BIG_ENDIAN);
3137}
3138
3139uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3140{
3141    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3142}
3143
3144uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3145{
3146    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3147}
3148
3149uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3150{
3151    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3152}
3153
3154/* warning: addr must be aligned */
3155static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3156                                                  MemTxAttrs attrs,
3157                                                  MemTxResult *result,
3158                                                  enum device_endian endian)
3159{
3160    uint8_t *ptr;
3161    uint64_t val;
3162    MemoryRegion *mr;
3163    hwaddr l = 8;
3164    hwaddr addr1;
3165    MemTxResult r;
3166    bool release_lock = false;
3167
3168    rcu_read_lock();
3169    mr = address_space_translate(as, addr, &addr1, &l,
3170                                 false);
3171    if (l < 8 || !memory_access_is_direct(mr, false)) {
3172        release_lock |= prepare_mmio_access(mr);
3173
3174        /* I/O case */
3175        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3176#if defined(TARGET_WORDS_BIGENDIAN)
3177        if (endian == DEVICE_LITTLE_ENDIAN) {
3178            val = bswap64(val);
3179        }
3180#else
3181        if (endian == DEVICE_BIG_ENDIAN) {
3182            val = bswap64(val);
3183        }
3184#endif
3185    } else {
3186        /* RAM case */
3187        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3188        switch (endian) {
3189        case DEVICE_LITTLE_ENDIAN:
3190            val = ldq_le_p(ptr);
3191            break;
3192        case DEVICE_BIG_ENDIAN:
3193            val = ldq_be_p(ptr);
3194            break;
3195        default:
3196            val = ldq_p(ptr);
3197            break;
3198        }
3199        r = MEMTX_OK;
3200    }
3201    if (result) {
3202        *result = r;
3203    }
3204    if (release_lock) {
3205        qemu_mutex_unlock_iothread();
3206    }
3207    rcu_read_unlock();
3208    return val;
3209}
3210
3211uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3212                           MemTxAttrs attrs, MemTxResult *result)
3213{
3214    return address_space_ldq_internal(as, addr, attrs, result,
3215                                      DEVICE_NATIVE_ENDIAN);
3216}
3217
3218uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3219                           MemTxAttrs attrs, MemTxResult *result)
3220{
3221    return address_space_ldq_internal(as, addr, attrs, result,
3222                                      DEVICE_LITTLE_ENDIAN);
3223}
3224
3225uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3226                           MemTxAttrs attrs, MemTxResult *result)
3227{
3228    return address_space_ldq_internal(as, addr, attrs, result,
3229                                      DEVICE_BIG_ENDIAN);
3230}
3231
3232uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3233{
3234    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3235}
3236
3237uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3238{
3239    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3240}
3241
3242uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3243{
3244    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3245}
3246
3247/* XXX: optimize */
3248uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3249                            MemTxAttrs attrs, MemTxResult *result)
3250{
3251    uint8_t val;
3252    MemTxResult r;
3253
3254    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3255    if (result) {
3256        *result = r;
3257    }
3258    return val;
3259}
3260
3261uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3262{
3263    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3264}
3265
3266/* warning: addr must be aligned */
3267static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3268                                                   hwaddr addr,
3269                                                   MemTxAttrs attrs,
3270                                                   MemTxResult *result,
3271                                                   enum device_endian endian)
3272{
3273    uint8_t *ptr;
3274    uint64_t val;
3275    MemoryRegion *mr;
3276    hwaddr l = 2;
3277    hwaddr addr1;
3278    MemTxResult r;
3279    bool release_lock = false;
3280
3281    rcu_read_lock();
3282    mr = address_space_translate(as, addr, &addr1, &l,
3283                                 false);
3284    if (l < 2 || !memory_access_is_direct(mr, false)) {
3285        release_lock |= prepare_mmio_access(mr);
3286
3287        /* I/O case */
3288        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3289#if defined(TARGET_WORDS_BIGENDIAN)
3290        if (endian == DEVICE_LITTLE_ENDIAN) {
3291            val = bswap16(val);
3292        }
3293#else
3294        if (endian == DEVICE_BIG_ENDIAN) {
3295            val = bswap16(val);
3296        }
3297#endif
3298    } else {
3299        /* RAM case */
3300        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3301        switch (endian) {
3302        case DEVICE_LITTLE_ENDIAN:
3303            val = lduw_le_p(ptr);
3304            break;
3305        case DEVICE_BIG_ENDIAN:
3306            val = lduw_be_p(ptr);
3307            break;
3308        default:
3309            val = lduw_p(ptr);
3310            break;
3311        }
3312        r = MEMTX_OK;
3313    }
3314    if (result) {
3315        *result = r;
3316    }
3317    if (release_lock) {
3318        qemu_mutex_unlock_iothread();
3319    }
3320    rcu_read_unlock();
3321    return val;
3322}
3323
3324uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3325                           MemTxAttrs attrs, MemTxResult *result)
3326{
3327    return address_space_lduw_internal(as, addr, attrs, result,
3328                                       DEVICE_NATIVE_ENDIAN);
3329}
3330
3331uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3332                           MemTxAttrs attrs, MemTxResult *result)
3333{
3334    return address_space_lduw_internal(as, addr, attrs, result,
3335                                       DEVICE_LITTLE_ENDIAN);
3336}
3337
3338uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3339                           MemTxAttrs attrs, MemTxResult *result)
3340{
3341    return address_space_lduw_internal(as, addr, attrs, result,
3342                                       DEVICE_BIG_ENDIAN);
3343}
3344
3345uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3346{
3347    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3348}
3349
3350uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3351{
3352    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3353}
3354
3355uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3356{
3357    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3358}
3359
3360/* warning: addr must be aligned. The ram page is not masked as dirty
3361   and the code inside is not invalidated. It is useful if the dirty
3362   bits are used to track modified PTEs */
3363void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3364                                MemTxAttrs attrs, MemTxResult *result)
3365{
3366    uint8_t *ptr;
3367    MemoryRegion *mr;
3368    hwaddr l = 4;
3369    hwaddr addr1;
3370    MemTxResult r;
3371    uint8_t dirty_log_mask;
3372    bool release_lock = false;
3373
3374    rcu_read_lock();
3375    mr = address_space_translate(as, addr, &addr1, &l,
3376                                 true);
3377    if (l < 4 || !memory_access_is_direct(mr, true)) {
3378        release_lock |= prepare_mmio_access(mr);
3379
3380        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3381    } else {
3382        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3383        stl_p(ptr, val);
3384
3385        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3386        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3387        cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3388                                            4, dirty_log_mask);
3389        r = MEMTX_OK;
3390    }
3391    if (result) {
3392        *result = r;
3393    }
3394    if (release_lock) {
3395        qemu_mutex_unlock_iothread();
3396    }
3397    rcu_read_unlock();
3398}
3399
3400void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3401{
3402    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3403}
3404
3405/* warning: addr must be aligned */
3406static inline void address_space_stl_internal(AddressSpace *as,
3407                                              hwaddr addr, uint32_t val,
3408                                              MemTxAttrs attrs,
3409                                              MemTxResult *result,
3410                                              enum device_endian endian)
3411{
3412    uint8_t *ptr;
3413    MemoryRegion *mr;
3414    hwaddr l = 4;
3415    hwaddr addr1;
3416    MemTxResult r;
3417    bool release_lock = false;
3418
3419    rcu_read_lock();
3420    mr = address_space_translate(as, addr, &addr1, &l,
3421                                 true);
3422    if (l < 4 || !memory_access_is_direct(mr, true)) {
3423        release_lock |= prepare_mmio_access(mr);
3424
3425#if defined(TARGET_WORDS_BIGENDIAN)
3426        if (endian == DEVICE_LITTLE_ENDIAN) {
3427            val = bswap32(val);
3428        }
3429#else
3430        if (endian == DEVICE_BIG_ENDIAN) {
3431            val = bswap32(val);
3432        }
3433#endif
3434        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3435    } else {
3436        /* RAM case */
3437        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3438        switch (endian) {
3439        case DEVICE_LITTLE_ENDIAN:
3440            stl_le_p(ptr, val);
3441            break;
3442        case DEVICE_BIG_ENDIAN:
3443            stl_be_p(ptr, val);
3444            break;
3445        default:
3446            stl_p(ptr, val);
3447            break;
3448        }
3449        invalidate_and_set_dirty(mr, addr1, 4);
3450        r = MEMTX_OK;
3451    }
3452    if (result) {
3453        *result = r;
3454    }
3455    if (release_lock) {
3456        qemu_mutex_unlock_iothread();
3457    }
3458    rcu_read_unlock();
3459}
3460
3461void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3462                       MemTxAttrs attrs, MemTxResult *result)
3463{
3464    address_space_stl_internal(as, addr, val, attrs, result,
3465                               DEVICE_NATIVE_ENDIAN);
3466}
3467
3468void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3469                       MemTxAttrs attrs, MemTxResult *result)
3470{
3471    address_space_stl_internal(as, addr, val, attrs, result,
3472                               DEVICE_LITTLE_ENDIAN);
3473}
3474
3475void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3476                       MemTxAttrs attrs, MemTxResult *result)
3477{
3478    address_space_stl_internal(as, addr, val, attrs, result,
3479                               DEVICE_BIG_ENDIAN);
3480}
3481
3482void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3483{
3484    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3485}
3486
3487void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3488{
3489    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3490}
3491
3492void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3493{
3494    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3495}
3496
3497/* XXX: optimize */
3498void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3499                       MemTxAttrs attrs, MemTxResult *result)
3500{
3501    uint8_t v = val;
3502    MemTxResult r;
3503
3504    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3505    if (result) {
3506        *result = r;
3507    }
3508}
3509
3510void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3511{
3512    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3513}
3514
3515/* warning: addr must be aligned */
3516static inline void address_space_stw_internal(AddressSpace *as,
3517                                              hwaddr addr, uint32_t val,
3518                                              MemTxAttrs attrs,
3519                                              MemTxResult *result,
3520                                              enum device_endian endian)
3521{
3522    uint8_t *ptr;
3523    MemoryRegion *mr;
3524    hwaddr l = 2;
3525    hwaddr addr1;
3526    MemTxResult r;
3527    bool release_lock = false;
3528
3529    rcu_read_lock();
3530    mr = address_space_translate(as, addr, &addr1, &l, true);
3531    if (l < 2 || !memory_access_is_direct(mr, true)) {
3532        release_lock |= prepare_mmio_access(mr);
3533
3534#if defined(TARGET_WORDS_BIGENDIAN)
3535        if (endian == DEVICE_LITTLE_ENDIAN) {
3536            val = bswap16(val);
3537        }
3538#else
3539        if (endian == DEVICE_BIG_ENDIAN) {
3540            val = bswap16(val);
3541        }
3542#endif
3543        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3544    } else {
3545        /* RAM case */
3546        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3547        switch (endian) {
3548        case DEVICE_LITTLE_ENDIAN:
3549            stw_le_p(ptr, val);
3550            break;
3551        case DEVICE_BIG_ENDIAN:
3552            stw_be_p(ptr, val);
3553            break;
3554        default:
3555            stw_p(ptr, val);
3556            break;
3557        }
3558        invalidate_and_set_dirty(mr, addr1, 2);
3559        r = MEMTX_OK;
3560    }
3561    if (result) {
3562        *result = r;
3563    }
3564    if (release_lock) {
3565        qemu_mutex_unlock_iothread();
3566    }
3567    rcu_read_unlock();
3568}
3569
3570void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3571                       MemTxAttrs attrs, MemTxResult *result)
3572{
3573    address_space_stw_internal(as, addr, val, attrs, result,
3574                               DEVICE_NATIVE_ENDIAN);
3575}
3576
3577void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3578                       MemTxAttrs attrs, MemTxResult *result)
3579{
3580    address_space_stw_internal(as, addr, val, attrs, result,
3581                               DEVICE_LITTLE_ENDIAN);
3582}
3583
3584void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3585                       MemTxAttrs attrs, MemTxResult *result)
3586{
3587    address_space_stw_internal(as, addr, val, attrs, result,
3588                               DEVICE_BIG_ENDIAN);
3589}
3590
3591void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3592{
3593    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3594}
3595
3596void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3597{
3598    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3599}
3600
3601void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3602{
3603    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3604}
3605
3606/* XXX: optimize */
3607void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3608                       MemTxAttrs attrs, MemTxResult *result)
3609{
3610    MemTxResult r;
3611    val = tswap64(val);
3612    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3613    if (result) {
3614        *result = r;
3615    }
3616}
3617
3618void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3619                       MemTxAttrs attrs, MemTxResult *result)
3620{
3621    MemTxResult r;
3622    val = cpu_to_le64(val);
3623    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3624    if (result) {
3625        *result = r;
3626    }
3627}
3628void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3629                       MemTxAttrs attrs, MemTxResult *result)
3630{
3631    MemTxResult r;
3632    val = cpu_to_be64(val);
3633    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3634    if (result) {
3635        *result = r;
3636    }
3637}
3638
3639void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3640{
3641    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3642}
3643
3644void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3645{
3646    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3647}
3648
3649void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3650{
3651    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3652}
3653
3654/* virtual memory access for debug (includes writing to ROM) */
3655int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3656                        uint8_t *buf, int len, int is_write)
3657{
3658    int l;
3659    hwaddr phys_addr;
3660    target_ulong page;
3661
3662    while (len > 0) {
3663        int asidx;
3664        MemTxAttrs attrs;
3665
3666        page = addr & TARGET_PAGE_MASK;
3667        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3668        asidx = cpu_asidx_from_attrs(cpu, attrs);
3669        /* if no physical page mapped, return an error */
3670        if (phys_addr == -1)
3671            return -1;
3672        l = (page + TARGET_PAGE_SIZE) - addr;
3673        if (l > len)
3674            l = len;
3675        phys_addr += (addr & ~TARGET_PAGE_MASK);
3676        if (is_write) {
3677            cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3678                                          phys_addr, buf, l);
3679        } else {
3680            address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3681                             MEMTXATTRS_UNSPECIFIED,
3682                             buf, l, 0);
3683        }
3684        len -= l;
3685        buf += l;
3686        addr += l;
3687    }
3688    return 0;
3689}
3690
3691/*
3692 * Allows code that needs to deal with migration bitmaps etc to still be built
3693 * target independent.
3694 */
3695size_t qemu_target_page_bits(void)
3696{
3697    return TARGET_PAGE_BITS;
3698}
3699
3700#endif
3701
3702/*
3703 * A helper function for the _utterly broken_ virtio device model to find out if
3704 * it's running on a big endian machine. Don't do this at home kids!
3705 */
3706bool target_words_bigendian(void);
3707bool target_words_bigendian(void)
3708{
3709#if defined(TARGET_WORDS_BIGENDIAN)
3710    return true;
3711#else
3712    return false;
3713#endif
3714}
3715
3716#ifndef CONFIG_USER_ONLY
3717bool cpu_physical_memory_is_io(hwaddr phys_addr)
3718{
3719    MemoryRegion*mr;
3720    hwaddr l = 1;
3721    bool res;
3722
3723    rcu_read_lock();
3724    mr = address_space_translate(&address_space_memory,
3725                                 phys_addr, &phys_addr, &l, false);
3726
3727    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3728    rcu_read_unlock();
3729    return res;
3730}
3731
3732int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3733{
3734    RAMBlock *block;
3735    int ret = 0;
3736
3737    rcu_read_lock();
3738    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3739        ret = func(block->idstr, block->host, block->offset,
3740                   block->used_length, opaque);
3741        if (ret) {
3742            break;
3743        }
3744    }
3745    rcu_read_unlock();
3746    return ret;
3747}
3748#endif
3749