qemu/exec.c
<<
>>
Prefs
   1/*
   2 *  Virtual page mapping
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "config.h"
  20#ifndef _WIN32
  21#include <sys/types.h>
  22#include <sys/mman.h>
  23#endif
  24
  25#include "qemu-common.h"
  26#include "cpu.h"
  27#include "tcg.h"
  28#include "hw/hw.h"
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/boards.h"
  31#endif
  32#include "hw/qdev.h"
  33#include "qemu/osdep.h"
  34#include "sysemu/kvm.h"
  35#include "sysemu/sysemu.h"
  36#include "hw/xen/xen.h"
  37#include "qemu/timer.h"
  38#include "qemu/config-file.h"
  39#include "qemu/error-report.h"
  40#include "exec/memory.h"
  41#include "sysemu/dma.h"
  42#include "exec/address-spaces.h"
  43#if defined(CONFIG_USER_ONLY)
  44#include <qemu.h>
  45#else /* !CONFIG_USER_ONLY */
  46#include "sysemu/xen-mapcache.h"
  47#include "trace.h"
  48#endif
  49#include "exec/cpu-all.h"
  50#include "qemu/rcu_queue.h"
  51#include "qemu/main-loop.h"
  52#include "translate-all.h"
  53#include "sysemu/replay.h"
  54
  55#include "exec/memory-internal.h"
  56#include "exec/ram_addr.h"
  57
  58#include "qemu/range.h"
  59#ifndef _WIN32
  60#include "qemu/mmap-alloc.h"
  61#endif
  62
  63//#define DEBUG_SUBPAGE
  64
  65#if !defined(CONFIG_USER_ONLY)
  66/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
  67 * are protected by the ramlist lock.
  68 */
  69RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
  70
  71static MemoryRegion *system_memory;
  72static MemoryRegion *system_io;
  73
  74AddressSpace address_space_io;
  75AddressSpace address_space_memory;
  76
  77MemoryRegion io_mem_rom, io_mem_notdirty;
  78static MemoryRegion io_mem_unassigned;
  79
  80/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
  81#define RAM_PREALLOC   (1 << 0)
  82
  83/* RAM is mmap-ed with MAP_SHARED */
  84#define RAM_SHARED     (1 << 1)
  85
  86/* Only a portion of RAM (used_length) is actually used, and migrated.
  87 * This used_length size can change across reboots.
  88 */
  89#define RAM_RESIZEABLE (1 << 2)
  90
  91/* RAM is backed by an mmapped file.
  92 */
  93#define RAM_FILE (1 << 3)
  94#endif
  95
  96struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  97/* current CPU in the current thread. It is only valid inside
  98   cpu_exec() */
  99__thread CPUState *current_cpu;
 100/* 0 = Do not count executed instructions.
 101   1 = Precise instruction counting.
 102   2 = Adaptive rate instruction counting.  */
 103int use_icount;
 104
 105#if !defined(CONFIG_USER_ONLY)
 106
 107typedef struct PhysPageEntry PhysPageEntry;
 108
 109struct PhysPageEntry {
 110    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
 111    uint32_t skip : 6;
 112     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
 113    uint32_t ptr : 26;
 114};
 115
 116#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
 117
 118/* Size of the L2 (and L3, etc) page tables.  */
 119#define ADDR_SPACE_BITS 64
 120
 121#define P_L2_BITS 9
 122#define P_L2_SIZE (1 << P_L2_BITS)
 123
 124#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
 125
 126typedef PhysPageEntry Node[P_L2_SIZE];
 127
 128typedef struct PhysPageMap {
 129    struct rcu_head rcu;
 130
 131    unsigned sections_nb;
 132    unsigned sections_nb_alloc;
 133    unsigned nodes_nb;
 134    unsigned nodes_nb_alloc;
 135    Node *nodes;
 136    MemoryRegionSection *sections;
 137} PhysPageMap;
 138
 139struct AddressSpaceDispatch {
 140    struct rcu_head rcu;
 141
 142    /* This is a multi-level map on the physical address space.
 143     * The bottom level has pointers to MemoryRegionSections.
 144     */
 145    PhysPageEntry phys_map;
 146    PhysPageMap map;
 147    AddressSpace *as;
 148};
 149
 150#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 151typedef struct subpage_t {
 152    MemoryRegion iomem;
 153    AddressSpace *as;
 154    hwaddr base;
 155    uint16_t sub_section[TARGET_PAGE_SIZE];
 156} subpage_t;
 157
 158#define PHYS_SECTION_UNASSIGNED 0
 159#define PHYS_SECTION_NOTDIRTY 1
 160#define PHYS_SECTION_ROM 2
 161#define PHYS_SECTION_WATCH 3
 162
 163static void io_mem_init(void);
 164static void memory_map_init(void);
 165static void tcg_commit(MemoryListener *listener);
 166
 167static MemoryRegion io_mem_watch;
 168
 169/**
 170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 171 * @cpu: the CPU whose AddressSpace this is
 172 * @as: the AddressSpace itself
 173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 175 */
 176struct CPUAddressSpace {
 177    CPUState *cpu;
 178    AddressSpace *as;
 179    struct AddressSpaceDispatch *memory_dispatch;
 180    MemoryListener tcg_as_listener;
 181};
 182
 183#endif
 184
 185#if !defined(CONFIG_USER_ONLY)
 186
 187static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 188{
 189    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
 190        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
 191        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
 192        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
 193    }
 194}
 195
 196static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 197{
 198    unsigned i;
 199    uint32_t ret;
 200    PhysPageEntry e;
 201    PhysPageEntry *p;
 202
 203    ret = map->nodes_nb++;
 204    p = map->nodes[ret];
 205    assert(ret != PHYS_MAP_NODE_NIL);
 206    assert(ret != map->nodes_nb_alloc);
 207
 208    e.skip = leaf ? 0 : 1;
 209    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
 210    for (i = 0; i < P_L2_SIZE; ++i) {
 211        memcpy(&p[i], &e, sizeof(e));
 212    }
 213    return ret;
 214}
 215
 216static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
 217                                hwaddr *index, hwaddr *nb, uint16_t leaf,
 218                                int level)
 219{
 220    PhysPageEntry *p;
 221    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
 222
 223    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
 224        lp->ptr = phys_map_node_alloc(map, level == 0);
 225    }
 226    p = map->nodes[lp->ptr];
 227    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
 228
 229    while (*nb && lp < &p[P_L2_SIZE]) {
 230        if ((*index & (step - 1)) == 0 && *nb >= step) {
 231            lp->skip = 0;
 232            lp->ptr = leaf;
 233            *index += step;
 234            *nb -= step;
 235        } else {
 236            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
 237        }
 238        ++lp;
 239    }
 240}
 241
 242static void phys_page_set(AddressSpaceDispatch *d,
 243                          hwaddr index, hwaddr nb,
 244                          uint16_t leaf)
 245{
 246    /* Wildly overreserve - it doesn't matter much. */
 247    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 248
 249    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 250}
 251
 252/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 253 * and update our entry so we can skip it and go directly to the destination.
 254 */
 255static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
 256{
 257    unsigned valid_ptr = P_L2_SIZE;
 258    int valid = 0;
 259    PhysPageEntry *p;
 260    int i;
 261
 262    if (lp->ptr == PHYS_MAP_NODE_NIL) {
 263        return;
 264    }
 265
 266    p = nodes[lp->ptr];
 267    for (i = 0; i < P_L2_SIZE; i++) {
 268        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
 269            continue;
 270        }
 271
 272        valid_ptr = i;
 273        valid++;
 274        if (p[i].skip) {
 275            phys_page_compact(&p[i], nodes, compacted);
 276        }
 277    }
 278
 279    /* We can only compress if there's only one child. */
 280    if (valid != 1) {
 281        return;
 282    }
 283
 284    assert(valid_ptr < P_L2_SIZE);
 285
 286    /* Don't compress if it won't fit in the # of bits we have. */
 287    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
 288        return;
 289    }
 290
 291    lp->ptr = p[valid_ptr].ptr;
 292    if (!p[valid_ptr].skip) {
 293        /* If our only child is a leaf, make this a leaf. */
 294        /* By design, we should have made this node a leaf to begin with so we
 295         * should never reach here.
 296         * But since it's so simple to handle this, let's do it just in case we
 297         * change this rule.
 298         */
 299        lp->skip = 0;
 300    } else {
 301        lp->skip += p[valid_ptr].skip;
 302    }
 303}
 304
 305static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 306{
 307    DECLARE_BITMAP(compacted, nodes_nb);
 308
 309    if (d->phys_map.skip) {
 310        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
 311    }
 312}
 313
 314static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
 315                                           Node *nodes, MemoryRegionSection *sections)
 316{
 317    PhysPageEntry *p;
 318    hwaddr index = addr >> TARGET_PAGE_BITS;
 319    int i;
 320
 321    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
 322        if (lp.ptr == PHYS_MAP_NODE_NIL) {
 323            return &sections[PHYS_SECTION_UNASSIGNED];
 324        }
 325        p = nodes[lp.ptr];
 326        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
 327    }
 328
 329    if (sections[lp.ptr].size.hi ||
 330        range_covers_byte(sections[lp.ptr].offset_within_address_space,
 331                          sections[lp.ptr].size.lo, addr)) {
 332        return &sections[lp.ptr];
 333    } else {
 334        return &sections[PHYS_SECTION_UNASSIGNED];
 335    }
 336}
 337
 338bool memory_region_is_unassigned(MemoryRegion *mr)
 339{
 340    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
 341        && mr != &io_mem_watch;
 342}
 343
 344/* Called from RCU critical section */
 345static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
 346                                                        hwaddr addr,
 347                                                        bool resolve_subpage)
 348{
 349    MemoryRegionSection *section;
 350    subpage_t *subpage;
 351
 352    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
 353    if (resolve_subpage && section->mr->subpage) {
 354        subpage = container_of(section->mr, subpage_t, iomem);
 355        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
 356    }
 357    return section;
 358}
 359
 360/* Called from RCU critical section */
 361static MemoryRegionSection *
 362address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
 363                                 hwaddr *plen, bool resolve_subpage)
 364{
 365    MemoryRegionSection *section;
 366    MemoryRegion *mr;
 367    Int128 diff;
 368
 369    section = address_space_lookup_region(d, addr, resolve_subpage);
 370    /* Compute offset within MemoryRegionSection */
 371    addr -= section->offset_within_address_space;
 372
 373    /* Compute offset within MemoryRegion */
 374    *xlat = addr + section->offset_within_region;
 375
 376    mr = section->mr;
 377
 378    /* MMIO registers can be expected to perform full-width accesses based only
 379     * on their address, without considering adjacent registers that could
 380     * decode to completely different MemoryRegions.  When such registers
 381     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
 382     * regions overlap wildly.  For this reason we cannot clamp the accesses
 383     * here.
 384     *
 385     * If the length is small (as is the case for address_space_ldl/stl),
 386     * everything works fine.  If the incoming length is large, however,
 387     * the caller really has to do the clamping through memory_access_size.
 388     */
 389    if (memory_region_is_ram(mr)) {
 390        diff = int128_sub(section->size, int128_make64(addr));
 391        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
 392    }
 393    return section;
 394}
 395
 396static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 397{
 398    if (memory_region_is_ram(mr)) {
 399        return !(is_write && mr->readonly);
 400    }
 401    if (memory_region_is_romd(mr)) {
 402        return !is_write;
 403    }
 404
 405    return false;
 406}
 407
 408/* Called from RCU critical section */
 409MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 410                                      hwaddr *xlat, hwaddr *plen,
 411                                      bool is_write)
 412{
 413    IOMMUTLBEntry iotlb;
 414    MemoryRegionSection *section;
 415    MemoryRegion *mr;
 416
 417    for (;;) {
 418        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
 419        section = address_space_translate_internal(d, addr, &addr, plen, true);
 420        mr = section->mr;
 421
 422        if (!mr->iommu_ops) {
 423            break;
 424        }
 425
 426        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 427        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 428                | (addr & iotlb.addr_mask));
 429        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
 430        if (!(iotlb.perm & (1 << is_write))) {
 431            mr = &io_mem_unassigned;
 432            break;
 433        }
 434
 435        as = iotlb.target_as;
 436    }
 437
 438    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
 439        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
 440        *plen = MIN(page, *plen);
 441    }
 442
 443    *xlat = addr;
 444    return mr;
 445}
 446
 447/* Called from RCU critical section */
 448MemoryRegionSection *
 449address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
 450                                  hwaddr *xlat, hwaddr *plen)
 451{
 452    MemoryRegionSection *section;
 453    section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
 454                                               addr, xlat, plen, false);
 455
 456    assert(!section->mr->iommu_ops);
 457    return section;
 458}
 459#endif
 460
 461#if !defined(CONFIG_USER_ONLY)
 462
 463static int cpu_common_post_load(void *opaque, int version_id)
 464{
 465    CPUState *cpu = opaque;
 466
 467    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
 468       version_id is increased. */
 469    cpu->interrupt_request &= ~0x01;
 470    tlb_flush(cpu, 1);
 471
 472    return 0;
 473}
 474
 475static int cpu_common_pre_load(void *opaque)
 476{
 477    CPUState *cpu = opaque;
 478
 479    cpu->exception_index = -1;
 480
 481    return 0;
 482}
 483
 484static bool cpu_common_exception_index_needed(void *opaque)
 485{
 486    CPUState *cpu = opaque;
 487
 488    return tcg_enabled() && cpu->exception_index != -1;
 489}
 490
 491static const VMStateDescription vmstate_cpu_common_exception_index = {
 492    .name = "cpu_common/exception_index",
 493    .version_id = 1,
 494    .minimum_version_id = 1,
 495    .needed = cpu_common_exception_index_needed,
 496    .fields = (VMStateField[]) {
 497        VMSTATE_INT32(exception_index, CPUState),
 498        VMSTATE_END_OF_LIST()
 499    }
 500};
 501
 502static bool cpu_common_crash_occurred_needed(void *opaque)
 503{
 504    CPUState *cpu = opaque;
 505
 506    return cpu->crash_occurred;
 507}
 508
 509static const VMStateDescription vmstate_cpu_common_crash_occurred = {
 510    .name = "cpu_common/crash_occurred",
 511    .version_id = 1,
 512    .minimum_version_id = 1,
 513    .needed = cpu_common_crash_occurred_needed,
 514    .fields = (VMStateField[]) {
 515        VMSTATE_BOOL(crash_occurred, CPUState),
 516        VMSTATE_END_OF_LIST()
 517    }
 518};
 519
 520const VMStateDescription vmstate_cpu_common = {
 521    .name = "cpu_common",
 522    .version_id = 1,
 523    .minimum_version_id = 1,
 524    .pre_load = cpu_common_pre_load,
 525    .post_load = cpu_common_post_load,
 526    .fields = (VMStateField[]) {
 527        VMSTATE_UINT32(halted, CPUState),
 528        VMSTATE_UINT32(interrupt_request, CPUState),
 529        VMSTATE_END_OF_LIST()
 530    },
 531    .subsections = (const VMStateDescription*[]) {
 532        &vmstate_cpu_common_exception_index,
 533        &vmstate_cpu_common_crash_occurred,
 534        NULL
 535    }
 536};
 537
 538#endif
 539
 540CPUState *qemu_get_cpu(int index)
 541{
 542    CPUState *cpu;
 543
 544    CPU_FOREACH(cpu) {
 545        if (cpu->cpu_index == index) {
 546            return cpu;
 547        }
 548    }
 549
 550    return NULL;
 551}
 552
 553#if !defined(CONFIG_USER_ONLY)
 554void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 555{
 556    /* We only support one address space per cpu at the moment.  */
 557    assert(cpu->as == as);
 558
 559    if (cpu->cpu_ases) {
 560        /* We've already registered the listener for our only AS */
 561        return;
 562    }
 563
 564    cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
 565    cpu->cpu_ases[0].cpu = cpu;
 566    cpu->cpu_ases[0].as = as;
 567    cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
 568    memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 569}
 570#endif
 571
 572#ifndef CONFIG_USER_ONLY
 573static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
 574
 575static int cpu_get_free_index(Error **errp)
 576{
 577    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
 578
 579    if (cpu >= MAX_CPUMASK_BITS) {
 580        error_setg(errp, "Trying to use more CPUs than max of %d",
 581                   MAX_CPUMASK_BITS);
 582        return -1;
 583    }
 584
 585    bitmap_set(cpu_index_map, cpu, 1);
 586    return cpu;
 587}
 588
 589void cpu_exec_exit(CPUState *cpu)
 590{
 591    if (cpu->cpu_index == -1) {
 592        /* cpu_index was never allocated by this @cpu or was already freed. */
 593        return;
 594    }
 595
 596    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
 597    cpu->cpu_index = -1;
 598}
 599#else
 600
 601static int cpu_get_free_index(Error **errp)
 602{
 603    CPUState *some_cpu;
 604    int cpu_index = 0;
 605
 606    CPU_FOREACH(some_cpu) {
 607        cpu_index++;
 608    }
 609    return cpu_index;
 610}
 611
 612void cpu_exec_exit(CPUState *cpu)
 613{
 614}
 615#endif
 616
 617void cpu_exec_init(CPUState *cpu, Error **errp)
 618{
 619    CPUClass *cc = CPU_GET_CLASS(cpu);
 620    int cpu_index;
 621    Error *local_err = NULL;
 622
 623#ifndef CONFIG_USER_ONLY
 624    cpu->as = &address_space_memory;
 625    cpu->thread_id = qemu_get_thread_id();
 626#endif
 627
 628#if defined(CONFIG_USER_ONLY)
 629    cpu_list_lock();
 630#endif
 631    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
 632    if (local_err) {
 633        error_propagate(errp, local_err);
 634#if defined(CONFIG_USER_ONLY)
 635        cpu_list_unlock();
 636#endif
 637        return;
 638    }
 639    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
 640#if defined(CONFIG_USER_ONLY)
 641    cpu_list_unlock();
 642#endif
 643    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
 644        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
 645    }
 646#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
 647    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
 648                    cpu_save, cpu_load, cpu->env_ptr);
 649    assert(cc->vmsd == NULL);
 650    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 651#endif
 652    if (cc->vmsd != NULL) {
 653        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
 654    }
 655}
 656
 657#if defined(CONFIG_USER_ONLY)
 658static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 659{
 660    tb_invalidate_phys_page_range(pc, pc + 1, 0);
 661}
 662#else
 663static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 664{
 665    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
 666    if (phys != -1) {
 667        tb_invalidate_phys_addr(cpu->as,
 668                                phys | (pc & ~TARGET_PAGE_MASK));
 669    }
 670}
 671#endif
 672
 673#if defined(CONFIG_USER_ONLY)
 674void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 675
 676{
 677}
 678
 679int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 680                          int flags)
 681{
 682    return -ENOSYS;
 683}
 684
 685void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 686{
 687}
 688
 689int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 690                          int flags, CPUWatchpoint **watchpoint)
 691{
 692    return -ENOSYS;
 693}
 694#else
 695/* Add a watchpoint.  */
 696int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 697                          int flags, CPUWatchpoint **watchpoint)
 698{
 699    CPUWatchpoint *wp;
 700
 701    /* forbid ranges which are empty or run off the end of the address space */
 702    if (len == 0 || (addr + len - 1) < addr) {
 703        error_report("tried to set invalid watchpoint at %"
 704                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
 705        return -EINVAL;
 706    }
 707    wp = g_malloc(sizeof(*wp));
 708
 709    wp->vaddr = addr;
 710    wp->len = len;
 711    wp->flags = flags;
 712
 713    /* keep all GDB-injected watchpoints in front */
 714    if (flags & BP_GDB) {
 715        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
 716    } else {
 717        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
 718    }
 719
 720    tlb_flush_page(cpu, addr);
 721
 722    if (watchpoint)
 723        *watchpoint = wp;
 724    return 0;
 725}
 726
 727/* Remove a specific watchpoint.  */
 728int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
 729                          int flags)
 730{
 731    CPUWatchpoint *wp;
 732
 733    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 734        if (addr == wp->vaddr && len == wp->len
 735                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
 736            cpu_watchpoint_remove_by_ref(cpu, wp);
 737            return 0;
 738        }
 739    }
 740    return -ENOENT;
 741}
 742
 743/* Remove a specific watchpoint by reference.  */
 744void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 745{
 746    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
 747
 748    tlb_flush_page(cpu, watchpoint->vaddr);
 749
 750    g_free(watchpoint);
 751}
 752
 753/* Remove all matching watchpoints.  */
 754void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 755{
 756    CPUWatchpoint *wp, *next;
 757
 758    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
 759        if (wp->flags & mask) {
 760            cpu_watchpoint_remove_by_ref(cpu, wp);
 761        }
 762    }
 763}
 764
 765/* Return true if this watchpoint address matches the specified
 766 * access (ie the address range covered by the watchpoint overlaps
 767 * partially or completely with the address range covered by the
 768 * access).
 769 */
 770static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 771                                                  vaddr addr,
 772                                                  vaddr len)
 773{
 774    /* We know the lengths are non-zero, but a little caution is
 775     * required to avoid errors in the case where the range ends
 776     * exactly at the top of the address space and so addr + len
 777     * wraps round to zero.
 778     */
 779    vaddr wpend = wp->vaddr + wp->len - 1;
 780    vaddr addrend = addr + len - 1;
 781
 782    return !(addr > wpend || wp->vaddr > addrend);
 783}
 784
 785#endif
 786
 787/* Add a breakpoint.  */
 788int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
 789                          CPUBreakpoint **breakpoint)
 790{
 791    CPUBreakpoint *bp;
 792
 793    bp = g_malloc(sizeof(*bp));
 794
 795    bp->pc = pc;
 796    bp->flags = flags;
 797
 798    /* keep all GDB-injected breakpoints in front */
 799    if (flags & BP_GDB) {
 800        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
 801    } else {
 802        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
 803    }
 804
 805    breakpoint_invalidate(cpu, pc);
 806
 807    if (breakpoint) {
 808        *breakpoint = bp;
 809    }
 810    return 0;
 811}
 812
 813/* Remove a specific breakpoint.  */
 814int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 815{
 816    CPUBreakpoint *bp;
 817
 818    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 819        if (bp->pc == pc && bp->flags == flags) {
 820            cpu_breakpoint_remove_by_ref(cpu, bp);
 821            return 0;
 822        }
 823    }
 824    return -ENOENT;
 825}
 826
 827/* Remove a specific breakpoint by reference.  */
 828void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 829{
 830    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
 831
 832    breakpoint_invalidate(cpu, breakpoint->pc);
 833
 834    g_free(breakpoint);
 835}
 836
 837/* Remove all matching breakpoints. */
 838void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 839{
 840    CPUBreakpoint *bp, *next;
 841
 842    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
 843        if (bp->flags & mask) {
 844            cpu_breakpoint_remove_by_ref(cpu, bp);
 845        }
 846    }
 847}
 848
 849/* enable or disable single step mode. EXCP_DEBUG is returned by the
 850   CPU loop after each instruction */
 851void cpu_single_step(CPUState *cpu, int enabled)
 852{
 853    if (cpu->singlestep_enabled != enabled) {
 854        cpu->singlestep_enabled = enabled;
 855        if (kvm_enabled()) {
 856            kvm_update_guest_debug(cpu, 0);
 857        } else {
 858            /* must flush all the translated code to avoid inconsistencies */
 859            /* XXX: only flush what is necessary */
 860            tb_flush(cpu);
 861        }
 862    }
 863}
 864
 865void cpu_abort(CPUState *cpu, const char *fmt, ...)
 866{
 867    va_list ap;
 868    va_list ap2;
 869
 870    va_start(ap, fmt);
 871    va_copy(ap2, ap);
 872    fprintf(stderr, "qemu: fatal: ");
 873    vfprintf(stderr, fmt, ap);
 874    fprintf(stderr, "\n");
 875    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 876    if (qemu_log_enabled()) {
 877        qemu_log("qemu: fatal: ");
 878        qemu_log_vprintf(fmt, ap2);
 879        qemu_log("\n");
 880        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 881        qemu_log_flush();
 882        qemu_log_close();
 883    }
 884    va_end(ap2);
 885    va_end(ap);
 886    replay_finish();
 887#if defined(CONFIG_USER_ONLY)
 888    {
 889        struct sigaction act;
 890        sigfillset(&act.sa_mask);
 891        act.sa_handler = SIG_DFL;
 892        sigaction(SIGABRT, &act, NULL);
 893    }
 894#endif
 895    abort();
 896}
 897
 898#if !defined(CONFIG_USER_ONLY)
 899/* Called from RCU critical section */
 900static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 901{
 902    RAMBlock *block;
 903
 904    block = atomic_rcu_read(&ram_list.mru_block);
 905    if (block && addr - block->offset < block->max_length) {
 906        return block;
 907    }
 908    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 909        if (addr - block->offset < block->max_length) {
 910            goto found;
 911        }
 912    }
 913
 914    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
 915    abort();
 916
 917found:
 918    /* It is safe to write mru_block outside the iothread lock.  This
 919     * is what happens:
 920     *
 921     *     mru_block = xxx
 922     *     rcu_read_unlock()
 923     *                                        xxx removed from list
 924     *                  rcu_read_lock()
 925     *                  read mru_block
 926     *                                        mru_block = NULL;
 927     *                                        call_rcu(reclaim_ramblock, xxx);
 928     *                  rcu_read_unlock()
 929     *
 930     * atomic_rcu_set is not needed here.  The block was already published
 931     * when it was placed into the list.  Here we're just making an extra
 932     * copy of the pointer.
 933     */
 934    ram_list.mru_block = block;
 935    return block;
 936}
 937
 938static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 939{
 940    CPUState *cpu;
 941    ram_addr_t start1;
 942    RAMBlock *block;
 943    ram_addr_t end;
 944
 945    end = TARGET_PAGE_ALIGN(start + length);
 946    start &= TARGET_PAGE_MASK;
 947
 948    rcu_read_lock();
 949    block = qemu_get_ram_block(start);
 950    assert(block == qemu_get_ram_block(end - 1));
 951    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
 952    CPU_FOREACH(cpu) {
 953        tlb_reset_dirty(cpu, start1, length);
 954    }
 955    rcu_read_unlock();
 956}
 957
 958/* Note: start and end must be within the same ram block.  */
 959bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
 960                                              ram_addr_t length,
 961                                              unsigned client)
 962{
 963    unsigned long end, page;
 964    bool dirty;
 965
 966    if (length == 0) {
 967        return false;
 968    }
 969
 970    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
 971    page = start >> TARGET_PAGE_BITS;
 972    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
 973                                         page, end - page);
 974
 975    if (dirty && tcg_enabled()) {
 976        tlb_reset_dirty_range_all(start, length);
 977    }
 978
 979    return dirty;
 980}
 981
 982/* Called from RCU critical section */
 983hwaddr memory_region_section_get_iotlb(CPUState *cpu,
 984                                       MemoryRegionSection *section,
 985                                       target_ulong vaddr,
 986                                       hwaddr paddr, hwaddr xlat,
 987                                       int prot,
 988                                       target_ulong *address)
 989{
 990    hwaddr iotlb;
 991    CPUWatchpoint *wp;
 992
 993    if (memory_region_is_ram(section->mr)) {
 994        /* Normal RAM.  */
 995        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
 996            + xlat;
 997        if (!section->readonly) {
 998            iotlb |= PHYS_SECTION_NOTDIRTY;
 999        } else {
1000            iotlb |= PHYS_SECTION_ROM;
1001        }
1002    } else {
1003        AddressSpaceDispatch *d;
1004
1005        d = atomic_rcu_read(&section->address_space->dispatch);
1006        iotlb = section - d->map.sections;
1007        iotlb += xlat;
1008    }
1009
1010    /* Make accesses to pages with watchpoints go via the
1011       watchpoint trap routines.  */
1012    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1013        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1014            /* Avoid trapping reads of pages with a write breakpoint. */
1015            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1016                iotlb = PHYS_SECTION_WATCH + paddr;
1017                *address |= TLB_MMIO;
1018                break;
1019            }
1020        }
1021    }
1022
1023    return iotlb;
1024}
1025#endif /* defined(CONFIG_USER_ONLY) */
1026
1027#if !defined(CONFIG_USER_ONLY)
1028
1029static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1030                             uint16_t section);
1031static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1032
1033static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1034                               qemu_anon_ram_alloc;
1035
1036/*
1037 * Set a custom physical guest memory alloator.
1038 * Accelerators with unusual needs may need this.  Hopefully, we can
1039 * get rid of it eventually.
1040 */
1041void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1042{
1043    phys_mem_alloc = alloc;
1044}
1045
1046static uint16_t phys_section_add(PhysPageMap *map,
1047                                 MemoryRegionSection *section)
1048{
1049    /* The physical section number is ORed with a page-aligned
1050     * pointer to produce the iotlb entries.  Thus it should
1051     * never overflow into the page-aligned value.
1052     */
1053    assert(map->sections_nb < TARGET_PAGE_SIZE);
1054
1055    if (map->sections_nb == map->sections_nb_alloc) {
1056        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1057        map->sections = g_renew(MemoryRegionSection, map->sections,
1058                                map->sections_nb_alloc);
1059    }
1060    map->sections[map->sections_nb] = *section;
1061    memory_region_ref(section->mr);
1062    return map->sections_nb++;
1063}
1064
1065static void phys_section_destroy(MemoryRegion *mr)
1066{
1067    bool have_sub_page = mr->subpage;
1068
1069    memory_region_unref(mr);
1070
1071    if (have_sub_page) {
1072        subpage_t *subpage = container_of(mr, subpage_t, iomem);
1073        object_unref(OBJECT(&subpage->iomem));
1074        g_free(subpage);
1075    }
1076}
1077
1078static void phys_sections_free(PhysPageMap *map)
1079{
1080    while (map->sections_nb > 0) {
1081        MemoryRegionSection *section = &map->sections[--map->sections_nb];
1082        phys_section_destroy(section->mr);
1083    }
1084    g_free(map->sections);
1085    g_free(map->nodes);
1086}
1087
1088static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1089{
1090    subpage_t *subpage;
1091    hwaddr base = section->offset_within_address_space
1092        & TARGET_PAGE_MASK;
1093    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1094                                                   d->map.nodes, d->map.sections);
1095    MemoryRegionSection subsection = {
1096        .offset_within_address_space = base,
1097        .size = int128_make64(TARGET_PAGE_SIZE),
1098    };
1099    hwaddr start, end;
1100
1101    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1102
1103    if (!(existing->mr->subpage)) {
1104        subpage = subpage_init(d->as, base);
1105        subsection.address_space = d->as;
1106        subsection.mr = &subpage->iomem;
1107        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1108                      phys_section_add(&d->map, &subsection));
1109    } else {
1110        subpage = container_of(existing->mr, subpage_t, iomem);
1111    }
1112    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1113    end = start + int128_get64(section->size) - 1;
1114    subpage_register(subpage, start, end,
1115                     phys_section_add(&d->map, section));
1116}
1117
1118
1119static void register_multipage(AddressSpaceDispatch *d,
1120                               MemoryRegionSection *section)
1121{
1122    hwaddr start_addr = section->offset_within_address_space;
1123    uint16_t section_index = phys_section_add(&d->map, section);
1124    uint64_t num_pages = int128_get64(int128_rshift(section->size,
1125                                                    TARGET_PAGE_BITS));
1126
1127    assert(num_pages);
1128    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1129}
1130
1131static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1132{
1133    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1134    AddressSpaceDispatch *d = as->next_dispatch;
1135    MemoryRegionSection now = *section, remain = *section;
1136    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1137
1138    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1139        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1140                       - now.offset_within_address_space;
1141
1142        now.size = int128_min(int128_make64(left), now.size);
1143        register_subpage(d, &now);
1144    } else {
1145        now.size = int128_zero();
1146    }
1147    while (int128_ne(remain.size, now.size)) {
1148        remain.size = int128_sub(remain.size, now.size);
1149        remain.offset_within_address_space += int128_get64(now.size);
1150        remain.offset_within_region += int128_get64(now.size);
1151        now = remain;
1152        if (int128_lt(remain.size, page_size)) {
1153            register_subpage(d, &now);
1154        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1155            now.size = page_size;
1156            register_subpage(d, &now);
1157        } else {
1158            now.size = int128_and(now.size, int128_neg(page_size));
1159            register_multipage(d, &now);
1160        }
1161    }
1162}
1163
1164void qemu_flush_coalesced_mmio_buffer(void)
1165{
1166    if (kvm_enabled())
1167        kvm_flush_coalesced_mmio_buffer();
1168}
1169
1170void qemu_mutex_lock_ramlist(void)
1171{
1172    qemu_mutex_lock(&ram_list.mutex);
1173}
1174
1175void qemu_mutex_unlock_ramlist(void)
1176{
1177    qemu_mutex_unlock(&ram_list.mutex);
1178}
1179
1180#ifdef __linux__
1181
1182#include <sys/vfs.h>
1183
1184#define HUGETLBFS_MAGIC       0x958458f6
1185
1186static long gethugepagesize(const char *path, Error **errp)
1187{
1188    struct statfs fs;
1189    int ret;
1190
1191    do {
1192        ret = statfs(path, &fs);
1193    } while (ret != 0 && errno == EINTR);
1194
1195    if (ret != 0) {
1196        error_setg_errno(errp, errno, "failed to get page size of file %s",
1197                         path);
1198        return 0;
1199    }
1200
1201    return fs.f_bsize;
1202}
1203
1204static void *file_ram_alloc(RAMBlock *block,
1205                            ram_addr_t memory,
1206                            const char *path,
1207                            Error **errp)
1208{
1209    struct stat st;
1210    char *filename;
1211    char *sanitized_name;
1212    char *c;
1213    void *area;
1214    int fd;
1215    uint64_t hpagesize;
1216    Error *local_err = NULL;
1217
1218    hpagesize = gethugepagesize(path, &local_err);
1219    if (local_err) {
1220        error_propagate(errp, local_err);
1221        goto error;
1222    }
1223    block->mr->align = hpagesize;
1224
1225    if (memory < hpagesize) {
1226        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1227                   "or larger than huge page size 0x%" PRIx64,
1228                   memory, hpagesize);
1229        goto error;
1230    }
1231
1232    if (kvm_enabled() && !kvm_has_sync_mmu()) {
1233        error_setg(errp,
1234                   "host lacks kvm mmu notifiers, -mem-path unsupported");
1235        goto error;
1236    }
1237
1238    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1239        /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1240        sanitized_name = g_strdup(memory_region_name(block->mr));
1241        for (c = sanitized_name; *c != '\0'; c++) {
1242            if (*c == '/') {
1243                *c = '_';
1244            }
1245        }
1246
1247        filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1248                                   sanitized_name);
1249        g_free(sanitized_name);
1250
1251        fd = mkstemp(filename);
1252        if (fd >= 0) {
1253            unlink(filename);
1254        }
1255        g_free(filename);
1256    } else {
1257        fd = open(path, O_RDWR | O_CREAT, 0644);
1258    }
1259
1260    if (fd < 0) {
1261        error_setg_errno(errp, errno,
1262                         "unable to create backing store for hugepages");
1263        goto error;
1264    }
1265
1266    memory = ROUND_UP(memory, hpagesize);
1267
1268    /*
1269     * ftruncate is not supported by hugetlbfs in older
1270     * hosts, so don't bother bailing out on errors.
1271     * If anything goes wrong with it under other filesystems,
1272     * mmap will fail.
1273     */
1274    if (ftruncate(fd, memory)) {
1275        perror("ftruncate");
1276    }
1277
1278    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1279    if (area == MAP_FAILED) {
1280        error_setg_errno(errp, errno,
1281                         "unable to map backing store for hugepages");
1282        close(fd);
1283        goto error;
1284    }
1285
1286    if (mem_prealloc) {
1287        os_mem_prealloc(fd, area, memory);
1288    }
1289
1290    block->fd = fd;
1291    return area;
1292
1293error:
1294    return NULL;
1295}
1296#endif
1297
1298/* Called with the ramlist lock held.  */
1299static ram_addr_t find_ram_offset(ram_addr_t size)
1300{
1301    RAMBlock *block, *next_block;
1302    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1303
1304    assert(size != 0); /* it would hand out same offset multiple times */
1305
1306    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1307        return 0;
1308    }
1309
1310    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1311        ram_addr_t end, next = RAM_ADDR_MAX;
1312
1313        end = block->offset + block->max_length;
1314
1315        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1316            if (next_block->offset >= end) {
1317                next = MIN(next, next_block->offset);
1318            }
1319        }
1320        if (next - end >= size && next - end < mingap) {
1321            offset = end;
1322            mingap = next - end;
1323        }
1324    }
1325
1326    if (offset == RAM_ADDR_MAX) {
1327        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1328                (uint64_t)size);
1329        abort();
1330    }
1331
1332    return offset;
1333}
1334
1335ram_addr_t last_ram_offset(void)
1336{
1337    RAMBlock *block;
1338    ram_addr_t last = 0;
1339
1340    rcu_read_lock();
1341    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1342        last = MAX(last, block->offset + block->max_length);
1343    }
1344    rcu_read_unlock();
1345    return last;
1346}
1347
1348static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1349{
1350    int ret;
1351
1352    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1353    if (!machine_dump_guest_core(current_machine)) {
1354        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1355        if (ret) {
1356            perror("qemu_madvise");
1357            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1358                            "but dump_guest_core=off specified\n");
1359        }
1360    }
1361}
1362
1363/* Called within an RCU critical section, or while the ramlist lock
1364 * is held.
1365 */
1366static RAMBlock *find_ram_block(ram_addr_t addr)
1367{
1368    RAMBlock *block;
1369
1370    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1371        if (block->offset == addr) {
1372            return block;
1373        }
1374    }
1375
1376    return NULL;
1377}
1378
1379const char *qemu_ram_get_idstr(RAMBlock *rb)
1380{
1381    return rb->idstr;
1382}
1383
1384/* Called with iothread lock held.  */
1385void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1386{
1387    RAMBlock *new_block, *block;
1388
1389    rcu_read_lock();
1390    new_block = find_ram_block(addr);
1391    assert(new_block);
1392    assert(!new_block->idstr[0]);
1393
1394    if (dev) {
1395        char *id = qdev_get_dev_path(dev);
1396        if (id) {
1397            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1398            g_free(id);
1399        }
1400    }
1401    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1402
1403    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1404        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1405            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1406                    new_block->idstr);
1407            abort();
1408        }
1409    }
1410    rcu_read_unlock();
1411}
1412
1413/* Called with iothread lock held.  */
1414void qemu_ram_unset_idstr(ram_addr_t addr)
1415{
1416    RAMBlock *block;
1417
1418    /* FIXME: arch_init.c assumes that this is not called throughout
1419     * migration.  Ignore the problem since hot-unplug during migration
1420     * does not work anyway.
1421     */
1422
1423    rcu_read_lock();
1424    block = find_ram_block(addr);
1425    if (block) {
1426        memset(block->idstr, 0, sizeof(block->idstr));
1427    }
1428    rcu_read_unlock();
1429}
1430
1431static int memory_try_enable_merging(void *addr, size_t len)
1432{
1433    if (!machine_mem_merge(current_machine)) {
1434        /* disabled by the user */
1435        return 0;
1436    }
1437
1438    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1439}
1440
1441/* Only legal before guest might have detected the memory size: e.g. on
1442 * incoming migration, or right after reset.
1443 *
1444 * As memory core doesn't know how is memory accessed, it is up to
1445 * resize callback to update device state and/or add assertions to detect
1446 * misuse, if necessary.
1447 */
1448int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1449{
1450    RAMBlock *block = find_ram_block(base);
1451
1452    assert(block);
1453
1454    newsize = HOST_PAGE_ALIGN(newsize);
1455
1456    if (block->used_length == newsize) {
1457        return 0;
1458    }
1459
1460    if (!(block->flags & RAM_RESIZEABLE)) {
1461        error_setg_errno(errp, EINVAL,
1462                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
1463                         " in != 0x" RAM_ADDR_FMT, block->idstr,
1464                         newsize, block->used_length);
1465        return -EINVAL;
1466    }
1467
1468    if (block->max_length < newsize) {
1469        error_setg_errno(errp, EINVAL,
1470                         "Length too large: %s: 0x" RAM_ADDR_FMT
1471                         " > 0x" RAM_ADDR_FMT, block->idstr,
1472                         newsize, block->max_length);
1473        return -EINVAL;
1474    }
1475
1476    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1477    block->used_length = newsize;
1478    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1479                                        DIRTY_CLIENTS_ALL);
1480    memory_region_set_size(block->mr, newsize);
1481    if (block->resized) {
1482        block->resized(block->idstr, newsize, block->host);
1483    }
1484    return 0;
1485}
1486
1487static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1488{
1489    RAMBlock *block;
1490    RAMBlock *last_block = NULL;
1491    ram_addr_t old_ram_size, new_ram_size;
1492
1493    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1494
1495    qemu_mutex_lock_ramlist();
1496    new_block->offset = find_ram_offset(new_block->max_length);
1497
1498    if (!new_block->host) {
1499        if (xen_enabled()) {
1500            xen_ram_alloc(new_block->offset, new_block->max_length,
1501                          new_block->mr);
1502        } else {
1503            new_block->host = phys_mem_alloc(new_block->max_length,
1504                                             &new_block->mr->align);
1505            if (!new_block->host) {
1506                error_setg_errno(errp, errno,
1507                                 "cannot set up guest memory '%s'",
1508                                 memory_region_name(new_block->mr));
1509                qemu_mutex_unlock_ramlist();
1510                return -1;
1511            }
1512            memory_try_enable_merging(new_block->host, new_block->max_length);
1513        }
1514    }
1515
1516    new_ram_size = MAX(old_ram_size,
1517              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1518    if (new_ram_size > old_ram_size) {
1519        migration_bitmap_extend(old_ram_size, new_ram_size);
1520    }
1521    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
1522     * QLIST (which has an RCU-friendly variant) does not have insertion at
1523     * tail, so save the last element in last_block.
1524     */
1525    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1526        last_block = block;
1527        if (block->max_length < new_block->max_length) {
1528            break;
1529        }
1530    }
1531    if (block) {
1532        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1533    } else if (last_block) {
1534        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1535    } else { /* list is empty */
1536        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1537    }
1538    ram_list.mru_block = NULL;
1539
1540    /* Write list before version */
1541    smp_wmb();
1542    ram_list.version++;
1543    qemu_mutex_unlock_ramlist();
1544
1545    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1546
1547    if (new_ram_size > old_ram_size) {
1548        int i;
1549
1550        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
1551        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1552            ram_list.dirty_memory[i] =
1553                bitmap_zero_extend(ram_list.dirty_memory[i],
1554                                   old_ram_size, new_ram_size);
1555       }
1556    }
1557    cpu_physical_memory_set_dirty_range(new_block->offset,
1558                                        new_block->used_length,
1559                                        DIRTY_CLIENTS_ALL);
1560
1561    if (new_block->host) {
1562        qemu_ram_setup_dump(new_block->host, new_block->max_length);
1563        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1564        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1565        if (kvm_enabled()) {
1566            kvm_setup_guest_memory(new_block->host, new_block->max_length);
1567        }
1568    }
1569
1570    return new_block->offset;
1571}
1572
1573#ifdef __linux__
1574ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1575                                    bool share, const char *mem_path,
1576                                    Error **errp)
1577{
1578    RAMBlock *new_block;
1579    ram_addr_t addr;
1580    Error *local_err = NULL;
1581
1582    if (xen_enabled()) {
1583        error_setg(errp, "-mem-path not supported with Xen");
1584        return -1;
1585    }
1586
1587    if (phys_mem_alloc != qemu_anon_ram_alloc) {
1588        /*
1589         * file_ram_alloc() needs to allocate just like
1590         * phys_mem_alloc, but we haven't bothered to provide
1591         * a hook there.
1592         */
1593        error_setg(errp,
1594                   "-mem-path not supported with this accelerator");
1595        return -1;
1596    }
1597
1598    size = HOST_PAGE_ALIGN(size);
1599    new_block = g_malloc0(sizeof(*new_block));
1600    new_block->mr = mr;
1601    new_block->used_length = size;
1602    new_block->max_length = size;
1603    new_block->flags = share ? RAM_SHARED : 0;
1604    new_block->flags |= RAM_FILE;
1605    new_block->host = file_ram_alloc(new_block, size,
1606                                     mem_path, errp);
1607    if (!new_block->host) {
1608        g_free(new_block);
1609        return -1;
1610    }
1611
1612    addr = ram_block_add(new_block, &local_err);
1613    if (local_err) {
1614        g_free(new_block);
1615        error_propagate(errp, local_err);
1616        return -1;
1617    }
1618    return addr;
1619}
1620#endif
1621
1622static
1623ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1624                                   void (*resized)(const char*,
1625                                                   uint64_t length,
1626                                                   void *host),
1627                                   void *host, bool resizeable,
1628                                   MemoryRegion *mr, Error **errp)
1629{
1630    RAMBlock *new_block;
1631    ram_addr_t addr;
1632    Error *local_err = NULL;
1633
1634    size = HOST_PAGE_ALIGN(size);
1635    max_size = HOST_PAGE_ALIGN(max_size);
1636    new_block = g_malloc0(sizeof(*new_block));
1637    new_block->mr = mr;
1638    new_block->resized = resized;
1639    new_block->used_length = size;
1640    new_block->max_length = max_size;
1641    assert(max_size >= size);
1642    new_block->fd = -1;
1643    new_block->host = host;
1644    if (host) {
1645        new_block->flags |= RAM_PREALLOC;
1646    }
1647    if (resizeable) {
1648        new_block->flags |= RAM_RESIZEABLE;
1649    }
1650    addr = ram_block_add(new_block, &local_err);
1651    if (local_err) {
1652        g_free(new_block);
1653        error_propagate(errp, local_err);
1654        return -1;
1655    }
1656    return addr;
1657}
1658
1659ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1660                                   MemoryRegion *mr, Error **errp)
1661{
1662    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1663}
1664
1665ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1666{
1667    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1668}
1669
1670ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1671                                     void (*resized)(const char*,
1672                                                     uint64_t length,
1673                                                     void *host),
1674                                     MemoryRegion *mr, Error **errp)
1675{
1676    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1677}
1678
1679void qemu_ram_free_from_ptr(ram_addr_t addr)
1680{
1681    RAMBlock *block;
1682
1683    qemu_mutex_lock_ramlist();
1684    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1685        if (addr == block->offset) {
1686            QLIST_REMOVE_RCU(block, next);
1687            ram_list.mru_block = NULL;
1688            /* Write list before version */
1689            smp_wmb();
1690            ram_list.version++;
1691            g_free_rcu(block, rcu);
1692            break;
1693        }
1694    }
1695    qemu_mutex_unlock_ramlist();
1696}
1697
1698static void reclaim_ramblock(RAMBlock *block)
1699{
1700    if (block->flags & RAM_PREALLOC) {
1701        ;
1702    } else if (xen_enabled()) {
1703        xen_invalidate_map_cache_entry(block->host);
1704#ifndef _WIN32
1705    } else if (block->fd >= 0) {
1706        if (block->flags & RAM_FILE) {
1707            qemu_ram_munmap(block->host, block->max_length);
1708        } else {
1709            munmap(block->host, block->max_length);
1710        }
1711        close(block->fd);
1712#endif
1713    } else {
1714        qemu_anon_ram_free(block->host, block->max_length);
1715    }
1716    g_free(block);
1717}
1718
1719void qemu_ram_free(ram_addr_t addr)
1720{
1721    RAMBlock *block;
1722
1723    qemu_mutex_lock_ramlist();
1724    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1725        if (addr == block->offset) {
1726            QLIST_REMOVE_RCU(block, next);
1727            ram_list.mru_block = NULL;
1728            /* Write list before version */
1729            smp_wmb();
1730            ram_list.version++;
1731            call_rcu(block, reclaim_ramblock, rcu);
1732            break;
1733        }
1734    }
1735    qemu_mutex_unlock_ramlist();
1736}
1737
1738#ifndef _WIN32
1739void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1740{
1741    RAMBlock *block;
1742    ram_addr_t offset;
1743    int flags;
1744    void *area, *vaddr;
1745
1746    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1747        offset = addr - block->offset;
1748        if (offset < block->max_length) {
1749            vaddr = ramblock_ptr(block, offset);
1750            if (block->flags & RAM_PREALLOC) {
1751                ;
1752            } else if (xen_enabled()) {
1753                abort();
1754            } else {
1755                flags = MAP_FIXED;
1756                if (block->fd >= 0) {
1757                    flags |= (block->flags & RAM_SHARED ?
1758                              MAP_SHARED : MAP_PRIVATE);
1759                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1760                                flags, block->fd, offset);
1761                } else {
1762                    /*
1763                     * Remap needs to match alloc.  Accelerators that
1764                     * set phys_mem_alloc never remap.  If they did,
1765                     * we'd need a remap hook here.
1766                     */
1767                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1768
1769                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1770                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1771                                flags, -1, 0);
1772                }
1773                if (area != vaddr) {
1774                    fprintf(stderr, "Could not remap addr: "
1775                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1776                            length, addr);
1777                    exit(1);
1778                }
1779                memory_try_enable_merging(vaddr, length);
1780                qemu_ram_setup_dump(vaddr, length);
1781            }
1782        }
1783    }
1784}
1785#endif /* !_WIN32 */
1786
1787int qemu_get_ram_fd(ram_addr_t addr)
1788{
1789    RAMBlock *block;
1790    int fd;
1791
1792    rcu_read_lock();
1793    block = qemu_get_ram_block(addr);
1794    fd = block->fd;
1795    rcu_read_unlock();
1796    return fd;
1797}
1798
1799void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1800{
1801    RAMBlock *block;
1802    void *ptr;
1803
1804    rcu_read_lock();
1805    block = qemu_get_ram_block(addr);
1806    ptr = ramblock_ptr(block, 0);
1807    rcu_read_unlock();
1808    return ptr;
1809}
1810
1811/* Return a host pointer to ram allocated with qemu_ram_alloc.
1812 * This should not be used for general purpose DMA.  Use address_space_map
1813 * or address_space_rw instead. For local memory (e.g. video ram) that the
1814 * device owns, use memory_region_get_ram_ptr.
1815 *
1816 * By the time this function returns, the returned pointer is not protected
1817 * by RCU anymore.  If the caller is not within an RCU critical section and
1818 * does not hold the iothread lock, it must have other means of protecting the
1819 * pointer, such as a reference to the region that includes the incoming
1820 * ram_addr_t.
1821 */
1822void *qemu_get_ram_ptr(ram_addr_t addr)
1823{
1824    RAMBlock *block;
1825    void *ptr;
1826
1827    rcu_read_lock();
1828    block = qemu_get_ram_block(addr);
1829
1830    if (xen_enabled() && block->host == NULL) {
1831        /* We need to check if the requested address is in the RAM
1832         * because we don't want to map the entire memory in QEMU.
1833         * In that case just map until the end of the page.
1834         */
1835        if (block->offset == 0) {
1836            ptr = xen_map_cache(addr, 0, 0);
1837            goto unlock;
1838        }
1839
1840        block->host = xen_map_cache(block->offset, block->max_length, 1);
1841    }
1842    ptr = ramblock_ptr(block, addr - block->offset);
1843
1844unlock:
1845    rcu_read_unlock();
1846    return ptr;
1847}
1848
1849/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1850 * but takes a size argument.
1851 *
1852 * By the time this function returns, the returned pointer is not protected
1853 * by RCU anymore.  If the caller is not within an RCU critical section and
1854 * does not hold the iothread lock, it must have other means of protecting the
1855 * pointer, such as a reference to the region that includes the incoming
1856 * ram_addr_t.
1857 */
1858static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1859{
1860    void *ptr;
1861    if (*size == 0) {
1862        return NULL;
1863    }
1864    if (xen_enabled()) {
1865        return xen_map_cache(addr, *size, 1);
1866    } else {
1867        RAMBlock *block;
1868        rcu_read_lock();
1869        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1870            if (addr - block->offset < block->max_length) {
1871                if (addr - block->offset + *size > block->max_length)
1872                    *size = block->max_length - addr + block->offset;
1873                ptr = ramblock_ptr(block, addr - block->offset);
1874                rcu_read_unlock();
1875                return ptr;
1876            }
1877        }
1878
1879        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1880        abort();
1881    }
1882}
1883
1884/*
1885 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1886 * in that RAMBlock.
1887 *
1888 * ptr: Host pointer to look up
1889 * round_offset: If true round the result offset down to a page boundary
1890 * *ram_addr: set to result ram_addr
1891 * *offset: set to result offset within the RAMBlock
1892 *
1893 * Returns: RAMBlock (or NULL if not found)
1894 *
1895 * By the time this function returns, the returned pointer is not protected
1896 * by RCU anymore.  If the caller is not within an RCU critical section and
1897 * does not hold the iothread lock, it must have other means of protecting the
1898 * pointer, such as a reference to the region that includes the incoming
1899 * ram_addr_t.
1900 */
1901RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1902                                   ram_addr_t *ram_addr,
1903                                   ram_addr_t *offset)
1904{
1905    RAMBlock *block;
1906    uint8_t *host = ptr;
1907
1908    if (xen_enabled()) {
1909        rcu_read_lock();
1910        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1911        block = qemu_get_ram_block(*ram_addr);
1912        if (block) {
1913            *offset = (host - block->host);
1914        }
1915        rcu_read_unlock();
1916        return block;
1917    }
1918
1919    rcu_read_lock();
1920    block = atomic_rcu_read(&ram_list.mru_block);
1921    if (block && block->host && host - block->host < block->max_length) {
1922        goto found;
1923    }
1924
1925    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1926        /* This case append when the block is not mapped. */
1927        if (block->host == NULL) {
1928            continue;
1929        }
1930        if (host - block->host < block->max_length) {
1931            goto found;
1932        }
1933    }
1934
1935    rcu_read_unlock();
1936    return NULL;
1937
1938found:
1939    *offset = (host - block->host);
1940    if (round_offset) {
1941        *offset &= TARGET_PAGE_MASK;
1942    }
1943    *ram_addr = block->offset + *offset;
1944    rcu_read_unlock();
1945    return block;
1946}
1947
1948/*
1949 * Finds the named RAMBlock
1950 *
1951 * name: The name of RAMBlock to find
1952 *
1953 * Returns: RAMBlock (or NULL if not found)
1954 */
1955RAMBlock *qemu_ram_block_by_name(const char *name)
1956{
1957    RAMBlock *block;
1958
1959    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1960        if (!strcmp(name, block->idstr)) {
1961            return block;
1962        }
1963    }
1964
1965    return NULL;
1966}
1967
1968/* Some of the softmmu routines need to translate from a host pointer
1969   (typically a TLB entry) back to a ram offset.  */
1970MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1971{
1972    RAMBlock *block;
1973    ram_addr_t offset; /* Not used */
1974
1975    block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1976
1977    if (!block) {
1978        return NULL;
1979    }
1980
1981    return block->mr;
1982}
1983
1984static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1985                               uint64_t val, unsigned size)
1986{
1987    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1988        tb_invalidate_phys_page_fast(ram_addr, size);
1989    }
1990    switch (size) {
1991    case 1:
1992        stb_p(qemu_get_ram_ptr(ram_addr), val);
1993        break;
1994    case 2:
1995        stw_p(qemu_get_ram_ptr(ram_addr), val);
1996        break;
1997    case 4:
1998        stl_p(qemu_get_ram_ptr(ram_addr), val);
1999        break;
2000    default:
2001        abort();
2002    }
2003    /* Set both VGA and migration bits for simplicity and to remove
2004     * the notdirty callback faster.
2005     */
2006    cpu_physical_memory_set_dirty_range(ram_addr, size,
2007                                        DIRTY_CLIENTS_NOCODE);
2008    /* we remove the notdirty callback only if the code has been
2009       flushed */
2010    if (!cpu_physical_memory_is_clean(ram_addr)) {
2011        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2012    }
2013}
2014
2015static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2016                                 unsigned size, bool is_write)
2017{
2018    return is_write;
2019}
2020
2021static const MemoryRegionOps notdirty_mem_ops = {
2022    .write = notdirty_mem_write,
2023    .valid.accepts = notdirty_mem_accepts,
2024    .endianness = DEVICE_NATIVE_ENDIAN,
2025};
2026
2027/* Generate a debug exception if a watchpoint has been hit.  */
2028static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2029{
2030    CPUState *cpu = current_cpu;
2031    CPUArchState *env = cpu->env_ptr;
2032    target_ulong pc, cs_base;
2033    target_ulong vaddr;
2034    CPUWatchpoint *wp;
2035    int cpu_flags;
2036
2037    if (cpu->watchpoint_hit) {
2038        /* We re-entered the check after replacing the TB. Now raise
2039         * the debug interrupt so that is will trigger after the
2040         * current instruction. */
2041        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2042        return;
2043    }
2044    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2045    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2046        if (cpu_watchpoint_address_matches(wp, vaddr, len)
2047            && (wp->flags & flags)) {
2048            if (flags == BP_MEM_READ) {
2049                wp->flags |= BP_WATCHPOINT_HIT_READ;
2050            } else {
2051                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2052            }
2053            wp->hitaddr = vaddr;
2054            wp->hitattrs = attrs;
2055            if (!cpu->watchpoint_hit) {
2056                cpu->watchpoint_hit = wp;
2057                tb_check_watchpoint(cpu);
2058                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2059                    cpu->exception_index = EXCP_DEBUG;
2060                    cpu_loop_exit(cpu);
2061                } else {
2062                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2063                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2064                    cpu_resume_from_signal(cpu, NULL);
2065                }
2066            }
2067        } else {
2068            wp->flags &= ~BP_WATCHPOINT_HIT;
2069        }
2070    }
2071}
2072
2073/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2074   so these check for a hit then pass through to the normal out-of-line
2075   phys routines.  */
2076static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2077                                  unsigned size, MemTxAttrs attrs)
2078{
2079    MemTxResult res;
2080    uint64_t data;
2081
2082    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2083    switch (size) {
2084    case 1:
2085        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2086        break;
2087    case 2:
2088        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2089        break;
2090    case 4:
2091        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2092        break;
2093    default: abort();
2094    }
2095    *pdata = data;
2096    return res;
2097}
2098
2099static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2100                                   uint64_t val, unsigned size,
2101                                   MemTxAttrs attrs)
2102{
2103    MemTxResult res;
2104
2105    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2106    switch (size) {
2107    case 1:
2108        address_space_stb(&address_space_memory, addr, val, attrs, &res);
2109        break;
2110    case 2:
2111        address_space_stw(&address_space_memory, addr, val, attrs, &res);
2112        break;
2113    case 4:
2114        address_space_stl(&address_space_memory, addr, val, attrs, &res);
2115        break;
2116    default: abort();
2117    }
2118    return res;
2119}
2120
2121static const MemoryRegionOps watch_mem_ops = {
2122    .read_with_attrs = watch_mem_read,
2123    .write_with_attrs = watch_mem_write,
2124    .endianness = DEVICE_NATIVE_ENDIAN,
2125};
2126
2127static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2128                                unsigned len, MemTxAttrs attrs)
2129{
2130    subpage_t *subpage = opaque;
2131    uint8_t buf[8];
2132    MemTxResult res;
2133
2134#if defined(DEBUG_SUBPAGE)
2135    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2136           subpage, len, addr);
2137#endif
2138    res = address_space_read(subpage->as, addr + subpage->base,
2139                             attrs, buf, len);
2140    if (res) {
2141        return res;
2142    }
2143    switch (len) {
2144    case 1:
2145        *data = ldub_p(buf);
2146        return MEMTX_OK;
2147    case 2:
2148        *data = lduw_p(buf);
2149        return MEMTX_OK;
2150    case 4:
2151        *data = ldl_p(buf);
2152        return MEMTX_OK;
2153    case 8:
2154        *data = ldq_p(buf);
2155        return MEMTX_OK;
2156    default:
2157        abort();
2158    }
2159}
2160
2161static MemTxResult subpage_write(void *opaque, hwaddr addr,
2162                                 uint64_t value, unsigned len, MemTxAttrs attrs)
2163{
2164    subpage_t *subpage = opaque;
2165    uint8_t buf[8];
2166
2167#if defined(DEBUG_SUBPAGE)
2168    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2169           " value %"PRIx64"\n",
2170           __func__, subpage, len, addr, value);
2171#endif
2172    switch (len) {
2173    case 1:
2174        stb_p(buf, value);
2175        break;
2176    case 2:
2177        stw_p(buf, value);
2178        break;
2179    case 4:
2180        stl_p(buf, value);
2181        break;
2182    case 8:
2183        stq_p(buf, value);
2184        break;
2185    default:
2186        abort();
2187    }
2188    return address_space_write(subpage->as, addr + subpage->base,
2189                               attrs, buf, len);
2190}
2191
2192static bool subpage_accepts(void *opaque, hwaddr addr,
2193                            unsigned len, bool is_write)
2194{
2195    subpage_t *subpage = opaque;
2196#if defined(DEBUG_SUBPAGE)
2197    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2198           __func__, subpage, is_write ? 'w' : 'r', len, addr);
2199#endif
2200
2201    return address_space_access_valid(subpage->as, addr + subpage->base,
2202                                      len, is_write);
2203}
2204
2205static const MemoryRegionOps subpage_ops = {
2206    .read_with_attrs = subpage_read,
2207    .write_with_attrs = subpage_write,
2208    .impl.min_access_size = 1,
2209    .impl.max_access_size = 8,
2210    .valid.min_access_size = 1,
2211    .valid.max_access_size = 8,
2212    .valid.accepts = subpage_accepts,
2213    .endianness = DEVICE_NATIVE_ENDIAN,
2214};
2215
2216static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2217                             uint16_t section)
2218{
2219    int idx, eidx;
2220
2221    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2222        return -1;
2223    idx = SUBPAGE_IDX(start);
2224    eidx = SUBPAGE_IDX(end);
2225#if defined(DEBUG_SUBPAGE)
2226    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2227           __func__, mmio, start, end, idx, eidx, section);
2228#endif
2229    for (; idx <= eidx; idx++) {
2230        mmio->sub_section[idx] = section;
2231    }
2232
2233    return 0;
2234}
2235
2236static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2237{
2238    subpage_t *mmio;
2239
2240    mmio = g_malloc0(sizeof(subpage_t));
2241
2242    mmio->as = as;
2243    mmio->base = base;
2244    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2245                          NULL, TARGET_PAGE_SIZE);
2246    mmio->iomem.subpage = true;
2247#if defined(DEBUG_SUBPAGE)
2248    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2249           mmio, base, TARGET_PAGE_SIZE);
2250#endif
2251    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2252
2253    return mmio;
2254}
2255
2256static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2257                              MemoryRegion *mr)
2258{
2259    assert(as);
2260    MemoryRegionSection section = {
2261        .address_space = as,
2262        .mr = mr,
2263        .offset_within_address_space = 0,
2264        .offset_within_region = 0,
2265        .size = int128_2_64(),
2266    };
2267
2268    return phys_section_add(map, &section);
2269}
2270
2271MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2272{
2273    CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2274    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2275    MemoryRegionSection *sections = d->map.sections;
2276
2277    return sections[index & ~TARGET_PAGE_MASK].mr;
2278}
2279
2280static void io_mem_init(void)
2281{
2282    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2283    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2284                          NULL, UINT64_MAX);
2285    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2286                          NULL, UINT64_MAX);
2287    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2288                          NULL, UINT64_MAX);
2289}
2290
2291static void mem_begin(MemoryListener *listener)
2292{
2293    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2294    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2295    uint16_t n;
2296
2297    n = dummy_section(&d->map, as, &io_mem_unassigned);
2298    assert(n == PHYS_SECTION_UNASSIGNED);
2299    n = dummy_section(&d->map, as, &io_mem_notdirty);
2300    assert(n == PHYS_SECTION_NOTDIRTY);
2301    n = dummy_section(&d->map, as, &io_mem_rom);
2302    assert(n == PHYS_SECTION_ROM);
2303    n = dummy_section(&d->map, as, &io_mem_watch);
2304    assert(n == PHYS_SECTION_WATCH);
2305
2306    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2307    d->as = as;
2308    as->next_dispatch = d;
2309}
2310
2311static void address_space_dispatch_free(AddressSpaceDispatch *d)
2312{
2313    phys_sections_free(&d->map);
2314    g_free(d);
2315}
2316
2317static void mem_commit(MemoryListener *listener)
2318{
2319    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2320    AddressSpaceDispatch *cur = as->dispatch;
2321    AddressSpaceDispatch *next = as->next_dispatch;
2322
2323    phys_page_compact_all(next, next->map.nodes_nb);
2324
2325    atomic_rcu_set(&as->dispatch, next);
2326    if (cur) {
2327        call_rcu(cur, address_space_dispatch_free, rcu);
2328    }
2329}
2330
2331static void tcg_commit(MemoryListener *listener)
2332{
2333    CPUAddressSpace *cpuas;
2334    AddressSpaceDispatch *d;
2335
2336    /* since each CPU stores ram addresses in its TLB cache, we must
2337       reset the modified entries */
2338    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2339    cpu_reloading_memory_map();
2340    /* The CPU and TLB are protected by the iothread lock.
2341     * We reload the dispatch pointer now because cpu_reloading_memory_map()
2342     * may have split the RCU critical section.
2343     */
2344    d = atomic_rcu_read(&cpuas->as->dispatch);
2345    cpuas->memory_dispatch = d;
2346    tlb_flush(cpuas->cpu, 1);
2347}
2348
2349void address_space_init_dispatch(AddressSpace *as)
2350{
2351    as->dispatch = NULL;
2352    as->dispatch_listener = (MemoryListener) {
2353        .begin = mem_begin,
2354        .commit = mem_commit,
2355        .region_add = mem_add,
2356        .region_nop = mem_add,
2357        .priority = 0,
2358    };
2359    memory_listener_register(&as->dispatch_listener, as);
2360}
2361
2362void address_space_unregister(AddressSpace *as)
2363{
2364    memory_listener_unregister(&as->dispatch_listener);
2365}
2366
2367void address_space_destroy_dispatch(AddressSpace *as)
2368{
2369    AddressSpaceDispatch *d = as->dispatch;
2370
2371    atomic_rcu_set(&as->dispatch, NULL);
2372    if (d) {
2373        call_rcu(d, address_space_dispatch_free, rcu);
2374    }
2375}
2376
2377static void memory_map_init(void)
2378{
2379    system_memory = g_malloc(sizeof(*system_memory));
2380
2381    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2382    address_space_init(&address_space_memory, system_memory, "memory");
2383
2384    system_io = g_malloc(sizeof(*system_io));
2385    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2386                          65536);
2387    address_space_init(&address_space_io, system_io, "I/O");
2388}
2389
2390MemoryRegion *get_system_memory(void)
2391{
2392    return system_memory;
2393}
2394
2395MemoryRegion *get_system_io(void)
2396{
2397    return system_io;
2398}
2399
2400#endif /* !defined(CONFIG_USER_ONLY) */
2401
2402/* physical memory access (slow version, mainly for debug) */
2403#if defined(CONFIG_USER_ONLY)
2404int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2405                        uint8_t *buf, int len, int is_write)
2406{
2407    int l, flags;
2408    target_ulong page;
2409    void * p;
2410
2411    while (len > 0) {
2412        page = addr & TARGET_PAGE_MASK;
2413        l = (page + TARGET_PAGE_SIZE) - addr;
2414        if (l > len)
2415            l = len;
2416        flags = page_get_flags(page);
2417        if (!(flags & PAGE_VALID))
2418            return -1;
2419        if (is_write) {
2420            if (!(flags & PAGE_WRITE))
2421                return -1;
2422            /* XXX: this code should not depend on lock_user */
2423            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2424                return -1;
2425            memcpy(p, buf, l);
2426            unlock_user(p, addr, l);
2427        } else {
2428            if (!(flags & PAGE_READ))
2429                return -1;
2430            /* XXX: this code should not depend on lock_user */
2431            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2432                return -1;
2433            memcpy(buf, p, l);
2434            unlock_user(p, addr, 0);
2435        }
2436        len -= l;
2437        buf += l;
2438        addr += l;
2439    }
2440    return 0;
2441}
2442
2443#else
2444
2445static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2446                                     hwaddr length)
2447{
2448    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2449    /* No early return if dirty_log_mask is or becomes 0, because
2450     * cpu_physical_memory_set_dirty_range will still call
2451     * xen_modified_memory.
2452     */
2453    if (dirty_log_mask) {
2454        dirty_log_mask =
2455            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2456    }
2457    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2458        tb_invalidate_phys_range(addr, addr + length);
2459        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2460    }
2461    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2462}
2463
2464static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2465{
2466    unsigned access_size_max = mr->ops->valid.max_access_size;
2467
2468    /* Regions are assumed to support 1-4 byte accesses unless
2469       otherwise specified.  */
2470    if (access_size_max == 0) {
2471        access_size_max = 4;
2472    }
2473
2474    /* Bound the maximum access by the alignment of the address.  */
2475    if (!mr->ops->impl.unaligned) {
2476        unsigned align_size_max = addr & -addr;
2477        if (align_size_max != 0 && align_size_max < access_size_max) {
2478            access_size_max = align_size_max;
2479        }
2480    }
2481
2482    /* Don't attempt accesses larger than the maximum.  */
2483    if (l > access_size_max) {
2484        l = access_size_max;
2485    }
2486    l = pow2floor(l);
2487
2488    return l;
2489}
2490
2491static bool prepare_mmio_access(MemoryRegion *mr)
2492{
2493    bool unlocked = !qemu_mutex_iothread_locked();
2494    bool release_lock = false;
2495
2496    if (unlocked && mr->global_locking) {
2497        qemu_mutex_lock_iothread();
2498        unlocked = false;
2499        release_lock = true;
2500    }
2501    if (mr->flush_coalesced_mmio) {
2502        if (unlocked) {
2503            qemu_mutex_lock_iothread();
2504        }
2505        qemu_flush_coalesced_mmio_buffer();
2506        if (unlocked) {
2507            qemu_mutex_unlock_iothread();
2508        }
2509    }
2510
2511    return release_lock;
2512}
2513
2514MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2515                             uint8_t *buf, int len, bool is_write)
2516{
2517    hwaddr l;
2518    uint8_t *ptr;
2519    uint64_t val;
2520    hwaddr addr1;
2521    MemoryRegion *mr;
2522    MemTxResult result = MEMTX_OK;
2523    bool release_lock = false;
2524
2525    rcu_read_lock();
2526    while (len > 0) {
2527        l = len;
2528        mr = address_space_translate(as, addr, &addr1, &l, is_write);
2529
2530        if (is_write) {
2531            if (!memory_access_is_direct(mr, is_write)) {
2532                release_lock |= prepare_mmio_access(mr);
2533                l = memory_access_size(mr, l, addr1);
2534                /* XXX: could force current_cpu to NULL to avoid
2535                   potential bugs */
2536                switch (l) {
2537                case 8:
2538                    /* 64 bit write access */
2539                    val = ldq_p(buf);
2540                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
2541                                                           attrs);
2542                    break;
2543                case 4:
2544                    /* 32 bit write access */
2545                    val = ldl_p(buf);
2546                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
2547                                                           attrs);
2548                    break;
2549                case 2:
2550                    /* 16 bit write access */
2551                    val = lduw_p(buf);
2552                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
2553                                                           attrs);
2554                    break;
2555                case 1:
2556                    /* 8 bit write access */
2557                    val = ldub_p(buf);
2558                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
2559                                                           attrs);
2560                    break;
2561                default:
2562                    abort();
2563                }
2564            } else {
2565                addr1 += memory_region_get_ram_addr(mr);
2566                /* RAM case */
2567                ptr = qemu_get_ram_ptr(addr1);
2568                memcpy(ptr, buf, l);
2569                invalidate_and_set_dirty(mr, addr1, l);
2570            }
2571        } else {
2572            if (!memory_access_is_direct(mr, is_write)) {
2573                /* I/O case */
2574                release_lock |= prepare_mmio_access(mr);
2575                l = memory_access_size(mr, l, addr1);
2576                switch (l) {
2577                case 8:
2578                    /* 64 bit read access */
2579                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2580                                                          attrs);
2581                    stq_p(buf, val);
2582                    break;
2583                case 4:
2584                    /* 32 bit read access */
2585                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2586                                                          attrs);
2587                    stl_p(buf, val);
2588                    break;
2589                case 2:
2590                    /* 16 bit read access */
2591                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2592                                                          attrs);
2593                    stw_p(buf, val);
2594                    break;
2595                case 1:
2596                    /* 8 bit read access */
2597                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2598                                                          attrs);
2599                    stb_p(buf, val);
2600                    break;
2601                default:
2602                    abort();
2603                }
2604            } else {
2605                /* RAM case */
2606                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2607                memcpy(buf, ptr, l);
2608            }
2609        }
2610
2611        if (release_lock) {
2612            qemu_mutex_unlock_iothread();
2613            release_lock = false;
2614        }
2615
2616        len -= l;
2617        buf += l;
2618        addr += l;
2619    }
2620    rcu_read_unlock();
2621
2622    return result;
2623}
2624
2625MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2626                                const uint8_t *buf, int len)
2627{
2628    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2629}
2630
2631MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2632                               uint8_t *buf, int len)
2633{
2634    return address_space_rw(as, addr, attrs, buf, len, false);
2635}
2636
2637
2638void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2639                            int len, int is_write)
2640{
2641    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2642                     buf, len, is_write);
2643}
2644
2645enum write_rom_type {
2646    WRITE_DATA,
2647    FLUSH_CACHE,
2648};
2649
2650static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2651    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2652{
2653    hwaddr l;
2654    uint8_t *ptr;
2655    hwaddr addr1;
2656    MemoryRegion *mr;
2657
2658    rcu_read_lock();
2659    while (len > 0) {
2660        l = len;
2661        mr = address_space_translate(as, addr, &addr1, &l, true);
2662
2663        if (!(memory_region_is_ram(mr) ||
2664              memory_region_is_romd(mr))) {
2665            l = memory_access_size(mr, l, addr1);
2666        } else {
2667            addr1 += memory_region_get_ram_addr(mr);
2668            /* ROM/RAM case */
2669            ptr = qemu_get_ram_ptr(addr1);
2670            switch (type) {
2671            case WRITE_DATA:
2672                memcpy(ptr, buf, l);
2673                invalidate_and_set_dirty(mr, addr1, l);
2674                break;
2675            case FLUSH_CACHE:
2676                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2677                break;
2678            }
2679        }
2680        len -= l;
2681        buf += l;
2682        addr += l;
2683    }
2684    rcu_read_unlock();
2685}
2686
2687/* used for ROM loading : can write in RAM and ROM */
2688void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2689                                   const uint8_t *buf, int len)
2690{
2691    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2692}
2693
2694void cpu_flush_icache_range(hwaddr start, int len)
2695{
2696    /*
2697     * This function should do the same thing as an icache flush that was
2698     * triggered from within the guest. For TCG we are always cache coherent,
2699     * so there is no need to flush anything. For KVM / Xen we need to flush
2700     * the host's instruction cache at least.
2701     */
2702    if (tcg_enabled()) {
2703        return;
2704    }
2705
2706    cpu_physical_memory_write_rom_internal(&address_space_memory,
2707                                           start, NULL, len, FLUSH_CACHE);
2708}
2709
2710typedef struct {
2711    MemoryRegion *mr;
2712    void *buffer;
2713    hwaddr addr;
2714    hwaddr len;
2715    bool in_use;
2716} BounceBuffer;
2717
2718static BounceBuffer bounce;
2719
2720typedef struct MapClient {
2721    QEMUBH *bh;
2722    QLIST_ENTRY(MapClient) link;
2723} MapClient;
2724
2725QemuMutex map_client_list_lock;
2726static QLIST_HEAD(map_client_list, MapClient) map_client_list
2727    = QLIST_HEAD_INITIALIZER(map_client_list);
2728
2729static void cpu_unregister_map_client_do(MapClient *client)
2730{
2731    QLIST_REMOVE(client, link);
2732    g_free(client);
2733}
2734
2735static void cpu_notify_map_clients_locked(void)
2736{
2737    MapClient *client;
2738
2739    while (!QLIST_EMPTY(&map_client_list)) {
2740        client = QLIST_FIRST(&map_client_list);
2741        qemu_bh_schedule(client->bh);
2742        cpu_unregister_map_client_do(client);
2743    }
2744}
2745
2746void cpu_register_map_client(QEMUBH *bh)
2747{
2748    MapClient *client = g_malloc(sizeof(*client));
2749
2750    qemu_mutex_lock(&map_client_list_lock);
2751    client->bh = bh;
2752    QLIST_INSERT_HEAD(&map_client_list, client, link);
2753    if (!atomic_read(&bounce.in_use)) {
2754        cpu_notify_map_clients_locked();
2755    }
2756    qemu_mutex_unlock(&map_client_list_lock);
2757}
2758
2759void cpu_exec_init_all(void)
2760{
2761    qemu_mutex_init(&ram_list.mutex);
2762    io_mem_init();
2763    memory_map_init();
2764    qemu_mutex_init(&map_client_list_lock);
2765}
2766
2767void cpu_unregister_map_client(QEMUBH *bh)
2768{
2769    MapClient *client;
2770
2771    qemu_mutex_lock(&map_client_list_lock);
2772    QLIST_FOREACH(client, &map_client_list, link) {
2773        if (client->bh == bh) {
2774            cpu_unregister_map_client_do(client);
2775            break;
2776        }
2777    }
2778    qemu_mutex_unlock(&map_client_list_lock);
2779}
2780
2781static void cpu_notify_map_clients(void)
2782{
2783    qemu_mutex_lock(&map_client_list_lock);
2784    cpu_notify_map_clients_locked();
2785    qemu_mutex_unlock(&map_client_list_lock);
2786}
2787
2788bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2789{
2790    MemoryRegion *mr;
2791    hwaddr l, xlat;
2792
2793    rcu_read_lock();
2794    while (len > 0) {
2795        l = len;
2796        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2797        if (!memory_access_is_direct(mr, is_write)) {
2798            l = memory_access_size(mr, l, addr);
2799            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2800                return false;
2801            }
2802        }
2803
2804        len -= l;
2805        addr += l;
2806    }
2807    rcu_read_unlock();
2808    return true;
2809}
2810
2811/* Map a physical memory region into a host virtual address.
2812 * May map a subset of the requested range, given by and returned in *plen.
2813 * May return NULL if resources needed to perform the mapping are exhausted.
2814 * Use only for reads OR writes - not for read-modify-write operations.
2815 * Use cpu_register_map_client() to know when retrying the map operation is
2816 * likely to succeed.
2817 */
2818void *address_space_map(AddressSpace *as,
2819                        hwaddr addr,
2820                        hwaddr *plen,
2821                        bool is_write)
2822{
2823    hwaddr len = *plen;
2824    hwaddr done = 0;
2825    hwaddr l, xlat, base;
2826    MemoryRegion *mr, *this_mr;
2827    ram_addr_t raddr;
2828
2829    if (len == 0) {
2830        return NULL;
2831    }
2832
2833    l = len;
2834    rcu_read_lock();
2835    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2836
2837    if (!memory_access_is_direct(mr, is_write)) {
2838        if (atomic_xchg(&bounce.in_use, true)) {
2839            rcu_read_unlock();
2840            return NULL;
2841        }
2842        /* Avoid unbounded allocations */
2843        l = MIN(l, TARGET_PAGE_SIZE);
2844        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2845        bounce.addr = addr;
2846        bounce.len = l;
2847
2848        memory_region_ref(mr);
2849        bounce.mr = mr;
2850        if (!is_write) {
2851            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2852                               bounce.buffer, l);
2853        }
2854
2855        rcu_read_unlock();
2856        *plen = l;
2857        return bounce.buffer;
2858    }
2859
2860    base = xlat;
2861    raddr = memory_region_get_ram_addr(mr);
2862
2863    for (;;) {
2864        len -= l;
2865        addr += l;
2866        done += l;
2867        if (len == 0) {
2868            break;
2869        }
2870
2871        l = len;
2872        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2873        if (this_mr != mr || xlat != base + done) {
2874            break;
2875        }
2876    }
2877
2878    memory_region_ref(mr);
2879    rcu_read_unlock();
2880    *plen = done;
2881    return qemu_ram_ptr_length(raddr + base, plen);
2882}
2883
2884/* Unmaps a memory region previously mapped by address_space_map().
2885 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2886 * the amount of memory that was actually read or written by the caller.
2887 */
2888void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2889                         int is_write, hwaddr access_len)
2890{
2891    if (buffer != bounce.buffer) {
2892        MemoryRegion *mr;
2893        ram_addr_t addr1;
2894
2895        mr = qemu_ram_addr_from_host(buffer, &addr1);
2896        assert(mr != NULL);
2897        if (is_write) {
2898            invalidate_and_set_dirty(mr, addr1, access_len);
2899        }
2900        if (xen_enabled()) {
2901            xen_invalidate_map_cache_entry(buffer);
2902        }
2903        memory_region_unref(mr);
2904        return;
2905    }
2906    if (is_write) {
2907        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2908                            bounce.buffer, access_len);
2909    }
2910    qemu_vfree(bounce.buffer);
2911    bounce.buffer = NULL;
2912    memory_region_unref(bounce.mr);
2913    atomic_mb_set(&bounce.in_use, false);
2914    cpu_notify_map_clients();
2915}
2916
2917void *cpu_physical_memory_map(hwaddr addr,
2918                              hwaddr *plen,
2919                              int is_write)
2920{
2921    return address_space_map(&address_space_memory, addr, plen, is_write);
2922}
2923
2924void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2925                               int is_write, hwaddr access_len)
2926{
2927    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2928}
2929
2930/* warning: addr must be aligned */
2931static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2932                                                  MemTxAttrs attrs,
2933                                                  MemTxResult *result,
2934                                                  enum device_endian endian)
2935{
2936    uint8_t *ptr;
2937    uint64_t val;
2938    MemoryRegion *mr;
2939    hwaddr l = 4;
2940    hwaddr addr1;
2941    MemTxResult r;
2942    bool release_lock = false;
2943
2944    rcu_read_lock();
2945    mr = address_space_translate(as, addr, &addr1, &l, false);
2946    if (l < 4 || !memory_access_is_direct(mr, false)) {
2947        release_lock |= prepare_mmio_access(mr);
2948
2949        /* I/O case */
2950        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2951#if defined(TARGET_WORDS_BIGENDIAN)
2952        if (endian == DEVICE_LITTLE_ENDIAN) {
2953            val = bswap32(val);
2954        }
2955#else
2956        if (endian == DEVICE_BIG_ENDIAN) {
2957            val = bswap32(val);
2958        }
2959#endif
2960    } else {
2961        /* RAM case */
2962        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2963                                & TARGET_PAGE_MASK)
2964                               + addr1);
2965        switch (endian) {
2966        case DEVICE_LITTLE_ENDIAN:
2967            val = ldl_le_p(ptr);
2968            break;
2969        case DEVICE_BIG_ENDIAN:
2970            val = ldl_be_p(ptr);
2971            break;
2972        default:
2973            val = ldl_p(ptr);
2974            break;
2975        }
2976        r = MEMTX_OK;
2977    }
2978    if (result) {
2979        *result = r;
2980    }
2981    if (release_lock) {
2982        qemu_mutex_unlock_iothread();
2983    }
2984    rcu_read_unlock();
2985    return val;
2986}
2987
2988uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2989                           MemTxAttrs attrs, MemTxResult *result)
2990{
2991    return address_space_ldl_internal(as, addr, attrs, result,
2992                                      DEVICE_NATIVE_ENDIAN);
2993}
2994
2995uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2996                              MemTxAttrs attrs, MemTxResult *result)
2997{
2998    return address_space_ldl_internal(as, addr, attrs, result,
2999                                      DEVICE_LITTLE_ENDIAN);
3000}
3001
3002uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3003                              MemTxAttrs attrs, MemTxResult *result)
3004{
3005    return address_space_ldl_internal(as, addr, attrs, result,
3006                                      DEVICE_BIG_ENDIAN);
3007}
3008
3009uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3010{
3011    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3012}
3013
3014uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3015{
3016    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3017}
3018
3019uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3020{
3021    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3022}
3023
3024/* warning: addr must be aligned */
3025static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3026                                                  MemTxAttrs attrs,
3027                                                  MemTxResult *result,
3028                                                  enum device_endian endian)
3029{
3030    uint8_t *ptr;
3031    uint64_t val;
3032    MemoryRegion *mr;
3033    hwaddr l = 8;
3034    hwaddr addr1;
3035    MemTxResult r;
3036    bool release_lock = false;
3037
3038    rcu_read_lock();
3039    mr = address_space_translate(as, addr, &addr1, &l,
3040                                 false);
3041    if (l < 8 || !memory_access_is_direct(mr, false)) {
3042        release_lock |= prepare_mmio_access(mr);
3043
3044        /* I/O case */
3045        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3046#if defined(TARGET_WORDS_BIGENDIAN)
3047        if (endian == DEVICE_LITTLE_ENDIAN) {
3048            val = bswap64(val);
3049        }
3050#else
3051        if (endian == DEVICE_BIG_ENDIAN) {
3052            val = bswap64(val);
3053        }
3054#endif
3055    } else {
3056        /* RAM case */
3057        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3058                                & TARGET_PAGE_MASK)
3059                               + addr1);
3060        switch (endian) {
3061        case DEVICE_LITTLE_ENDIAN:
3062            val = ldq_le_p(ptr);
3063            break;
3064        case DEVICE_BIG_ENDIAN:
3065            val = ldq_be_p(ptr);
3066            break;
3067        default:
3068            val = ldq_p(ptr);
3069            break;
3070        }
3071        r = MEMTX_OK;
3072    }
3073    if (result) {
3074        *result = r;
3075    }
3076    if (release_lock) {
3077        qemu_mutex_unlock_iothread();
3078    }
3079    rcu_read_unlock();
3080    return val;
3081}
3082
3083uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3084                           MemTxAttrs attrs, MemTxResult *result)
3085{
3086    return address_space_ldq_internal(as, addr, attrs, result,
3087                                      DEVICE_NATIVE_ENDIAN);
3088}
3089
3090uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3091                           MemTxAttrs attrs, MemTxResult *result)
3092{
3093    return address_space_ldq_internal(as, addr, attrs, result,
3094                                      DEVICE_LITTLE_ENDIAN);
3095}
3096
3097uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3098                           MemTxAttrs attrs, MemTxResult *result)
3099{
3100    return address_space_ldq_internal(as, addr, attrs, result,
3101                                      DEVICE_BIG_ENDIAN);
3102}
3103
3104uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3105{
3106    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3107}
3108
3109uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3110{
3111    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3112}
3113
3114uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3115{
3116    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3117}
3118
3119/* XXX: optimize */
3120uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3121                            MemTxAttrs attrs, MemTxResult *result)
3122{
3123    uint8_t val;
3124    MemTxResult r;
3125
3126    r = address_space_rw(as, addr, attrs, &val, 1, 0);
3127    if (result) {
3128        *result = r;
3129    }
3130    return val;
3131}
3132
3133uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3134{
3135    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3136}
3137
3138/* warning: addr must be aligned */
3139static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3140                                                   hwaddr addr,
3141                                                   MemTxAttrs attrs,
3142                                                   MemTxResult *result,
3143                                                   enum device_endian endian)
3144{
3145    uint8_t *ptr;
3146    uint64_t val;
3147    MemoryRegion *mr;
3148    hwaddr l = 2;
3149    hwaddr addr1;
3150    MemTxResult r;
3151    bool release_lock = false;
3152
3153    rcu_read_lock();
3154    mr = address_space_translate(as, addr, &addr1, &l,
3155                                 false);
3156    if (l < 2 || !memory_access_is_direct(mr, false)) {
3157        release_lock |= prepare_mmio_access(mr);
3158
3159        /* I/O case */
3160        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3161#if defined(TARGET_WORDS_BIGENDIAN)
3162        if (endian == DEVICE_LITTLE_ENDIAN) {
3163            val = bswap16(val);
3164        }
3165#else
3166        if (endian == DEVICE_BIG_ENDIAN) {
3167            val = bswap16(val);
3168        }
3169#endif
3170    } else {
3171        /* RAM case */
3172        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3173                                & TARGET_PAGE_MASK)
3174                               + addr1);
3175        switch (endian) {
3176        case DEVICE_LITTLE_ENDIAN:
3177            val = lduw_le_p(ptr);
3178            break;
3179        case DEVICE_BIG_ENDIAN:
3180            val = lduw_be_p(ptr);
3181            break;
3182        default:
3183            val = lduw_p(ptr);
3184            break;
3185        }
3186        r = MEMTX_OK;
3187    }
3188    if (result) {
3189        *result = r;
3190    }
3191    if (release_lock) {
3192        qemu_mutex_unlock_iothread();
3193    }
3194    rcu_read_unlock();
3195    return val;
3196}
3197
3198uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3199                           MemTxAttrs attrs, MemTxResult *result)
3200{
3201    return address_space_lduw_internal(as, addr, attrs, result,
3202                                       DEVICE_NATIVE_ENDIAN);
3203}
3204
3205uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3206                           MemTxAttrs attrs, MemTxResult *result)
3207{
3208    return address_space_lduw_internal(as, addr, attrs, result,
3209                                       DEVICE_LITTLE_ENDIAN);
3210}
3211
3212uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3213                           MemTxAttrs attrs, MemTxResult *result)
3214{
3215    return address_space_lduw_internal(as, addr, attrs, result,
3216                                       DEVICE_BIG_ENDIAN);
3217}
3218
3219uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3220{
3221    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3222}
3223
3224uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3225{
3226    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3227}
3228
3229uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3230{
3231    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3232}
3233
3234/* warning: addr must be aligned. The ram page is not masked as dirty
3235   and the code inside is not invalidated. It is useful if the dirty
3236   bits are used to track modified PTEs */
3237void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3238                                MemTxAttrs attrs, MemTxResult *result)
3239{
3240    uint8_t *ptr;
3241    MemoryRegion *mr;
3242    hwaddr l = 4;
3243    hwaddr addr1;
3244    MemTxResult r;
3245    uint8_t dirty_log_mask;
3246    bool release_lock = false;
3247
3248    rcu_read_lock();
3249    mr = address_space_translate(as, addr, &addr1, &l,
3250                                 true);
3251    if (l < 4 || !memory_access_is_direct(mr, true)) {
3252        release_lock |= prepare_mmio_access(mr);
3253
3254        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3255    } else {
3256        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3257        ptr = qemu_get_ram_ptr(addr1);
3258        stl_p(ptr, val);
3259
3260        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3261        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3262        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3263        r = MEMTX_OK;
3264    }
3265    if (result) {
3266        *result = r;
3267    }
3268    if (release_lock) {
3269        qemu_mutex_unlock_iothread();
3270    }
3271    rcu_read_unlock();
3272}
3273
3274void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3275{
3276    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3277}
3278
3279/* warning: addr must be aligned */
3280static inline void address_space_stl_internal(AddressSpace *as,
3281                                              hwaddr addr, uint32_t val,
3282                                              MemTxAttrs attrs,
3283                                              MemTxResult *result,
3284                                              enum device_endian endian)
3285{
3286    uint8_t *ptr;
3287    MemoryRegion *mr;
3288    hwaddr l = 4;
3289    hwaddr addr1;
3290    MemTxResult r;
3291    bool release_lock = false;
3292
3293    rcu_read_lock();
3294    mr = address_space_translate(as, addr, &addr1, &l,
3295                                 true);
3296    if (l < 4 || !memory_access_is_direct(mr, true)) {
3297        release_lock |= prepare_mmio_access(mr);
3298
3299#if defined(TARGET_WORDS_BIGENDIAN)
3300        if (endian == DEVICE_LITTLE_ENDIAN) {
3301            val = bswap32(val);
3302        }
3303#else
3304        if (endian == DEVICE_BIG_ENDIAN) {
3305            val = bswap32(val);
3306        }
3307#endif
3308        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3309    } else {
3310        /* RAM case */
3311        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3312        ptr = qemu_get_ram_ptr(addr1);
3313        switch (endian) {
3314        case DEVICE_LITTLE_ENDIAN:
3315            stl_le_p(ptr, val);
3316            break;
3317        case DEVICE_BIG_ENDIAN:
3318            stl_be_p(ptr, val);
3319            break;
3320        default:
3321            stl_p(ptr, val);
3322            break;
3323        }
3324        invalidate_and_set_dirty(mr, addr1, 4);
3325        r = MEMTX_OK;
3326    }
3327    if (result) {
3328        *result = r;
3329    }
3330    if (release_lock) {
3331        qemu_mutex_unlock_iothread();
3332    }
3333    rcu_read_unlock();
3334}
3335
3336void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3337                       MemTxAttrs attrs, MemTxResult *result)
3338{
3339    address_space_stl_internal(as, addr, val, attrs, result,
3340                               DEVICE_NATIVE_ENDIAN);
3341}
3342
3343void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3344                       MemTxAttrs attrs, MemTxResult *result)
3345{
3346    address_space_stl_internal(as, addr, val, attrs, result,
3347                               DEVICE_LITTLE_ENDIAN);
3348}
3349
3350void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3351                       MemTxAttrs attrs, MemTxResult *result)
3352{
3353    address_space_stl_internal(as, addr, val, attrs, result,
3354                               DEVICE_BIG_ENDIAN);
3355}
3356
3357void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3358{
3359    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3360}
3361
3362void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3363{
3364    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3365}
3366
3367void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3368{
3369    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3370}
3371
3372/* XXX: optimize */
3373void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3374                       MemTxAttrs attrs, MemTxResult *result)
3375{
3376    uint8_t v = val;
3377    MemTxResult r;
3378
3379    r = address_space_rw(as, addr, attrs, &v, 1, 1);
3380    if (result) {
3381        *result = r;
3382    }
3383}
3384
3385void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3386{
3387    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3388}
3389
3390/* warning: addr must be aligned */
3391static inline void address_space_stw_internal(AddressSpace *as,
3392                                              hwaddr addr, uint32_t val,
3393                                              MemTxAttrs attrs,
3394                                              MemTxResult *result,
3395                                              enum device_endian endian)
3396{
3397    uint8_t *ptr;
3398    MemoryRegion *mr;
3399    hwaddr l = 2;
3400    hwaddr addr1;
3401    MemTxResult r;
3402    bool release_lock = false;
3403
3404    rcu_read_lock();
3405    mr = address_space_translate(as, addr, &addr1, &l, true);
3406    if (l < 2 || !memory_access_is_direct(mr, true)) {
3407        release_lock |= prepare_mmio_access(mr);
3408
3409#if defined(TARGET_WORDS_BIGENDIAN)
3410        if (endian == DEVICE_LITTLE_ENDIAN) {
3411            val = bswap16(val);
3412        }
3413#else
3414        if (endian == DEVICE_BIG_ENDIAN) {
3415            val = bswap16(val);
3416        }
3417#endif
3418        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3419    } else {
3420        /* RAM case */
3421        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3422        ptr = qemu_get_ram_ptr(addr1);
3423        switch (endian) {
3424        case DEVICE_LITTLE_ENDIAN:
3425            stw_le_p(ptr, val);
3426            break;
3427        case DEVICE_BIG_ENDIAN:
3428            stw_be_p(ptr, val);
3429            break;
3430        default:
3431            stw_p(ptr, val);
3432            break;
3433        }
3434        invalidate_and_set_dirty(mr, addr1, 2);
3435        r = MEMTX_OK;
3436    }
3437    if (result) {
3438        *result = r;
3439    }
3440    if (release_lock) {
3441        qemu_mutex_unlock_iothread();
3442    }
3443    rcu_read_unlock();
3444}
3445
3446void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3447                       MemTxAttrs attrs, MemTxResult *result)
3448{
3449    address_space_stw_internal(as, addr, val, attrs, result,
3450                               DEVICE_NATIVE_ENDIAN);
3451}
3452
3453void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3454                       MemTxAttrs attrs, MemTxResult *result)
3455{
3456    address_space_stw_internal(as, addr, val, attrs, result,
3457                               DEVICE_LITTLE_ENDIAN);
3458}
3459
3460void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3461                       MemTxAttrs attrs, MemTxResult *result)
3462{
3463    address_space_stw_internal(as, addr, val, attrs, result,
3464                               DEVICE_BIG_ENDIAN);
3465}
3466
3467void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3468{
3469    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3470}
3471
3472void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3473{
3474    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3475}
3476
3477void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3478{
3479    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3480}
3481
3482/* XXX: optimize */
3483void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3484                       MemTxAttrs attrs, MemTxResult *result)
3485{
3486    MemTxResult r;
3487    val = tswap64(val);
3488    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3489    if (result) {
3490        *result = r;
3491    }
3492}
3493
3494void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3495                       MemTxAttrs attrs, MemTxResult *result)
3496{
3497    MemTxResult r;
3498    val = cpu_to_le64(val);
3499    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3500    if (result) {
3501        *result = r;
3502    }
3503}
3504void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3505                       MemTxAttrs attrs, MemTxResult *result)
3506{
3507    MemTxResult r;
3508    val = cpu_to_be64(val);
3509    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3510    if (result) {
3511        *result = r;
3512    }
3513}
3514
3515void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3516{
3517    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3518}
3519
3520void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3521{
3522    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3523}
3524
3525void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3526{
3527    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3528}
3529
3530/* virtual memory access for debug (includes writing to ROM) */
3531int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3532                        uint8_t *buf, int len, int is_write)
3533{
3534    int l;
3535    hwaddr phys_addr;
3536    target_ulong page;
3537
3538    while (len > 0) {
3539        page = addr & TARGET_PAGE_MASK;
3540        phys_addr = cpu_get_phys_page_debug(cpu, page);
3541        /* if no physical page mapped, return an error */
3542        if (phys_addr == -1)
3543            return -1;
3544        l = (page + TARGET_PAGE_SIZE) - addr;
3545        if (l > len)
3546            l = len;
3547        phys_addr += (addr & ~TARGET_PAGE_MASK);
3548        if (is_write) {
3549            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3550        } else {
3551            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3552                             buf, l, 0);
3553        }
3554        len -= l;
3555        buf += l;
3556        addr += l;
3557    }
3558    return 0;
3559}
3560
3561/*
3562 * Allows code that needs to deal with migration bitmaps etc to still be built
3563 * target independent.
3564 */
3565size_t qemu_target_page_bits(void)
3566{
3567    return TARGET_PAGE_BITS;
3568}
3569
3570#endif
3571
3572/*
3573 * A helper function for the _utterly broken_ virtio device model to find out if
3574 * it's running on a big endian machine. Don't do this at home kids!
3575 */
3576bool target_words_bigendian(void);
3577bool target_words_bigendian(void)
3578{
3579#if defined(TARGET_WORDS_BIGENDIAN)
3580    return true;
3581#else
3582    return false;
3583#endif
3584}
3585
3586#ifndef CONFIG_USER_ONLY
3587bool cpu_physical_memory_is_io(hwaddr phys_addr)
3588{
3589    MemoryRegion*mr;
3590    hwaddr l = 1;
3591    bool res;
3592
3593    rcu_read_lock();
3594    mr = address_space_translate(&address_space_memory,
3595                                 phys_addr, &phys_addr, &l, false);
3596
3597    res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3598    rcu_read_unlock();
3599    return res;
3600}
3601
3602int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3603{
3604    RAMBlock *block;
3605    int ret = 0;
3606
3607    rcu_read_lock();
3608    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3609        ret = func(block->idstr, block->host, block->offset,
3610                   block->used_length, opaque);
3611        if (ret) {
3612            break;
3613        }
3614    }
3615    rcu_read_unlock();
3616    return ret;
3617}
3618#endif
3619