qemu/hw/i386/xen/xen-hvm.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2010       Citrix Ltd.
   3 *
   4 * This work is licensed under the terms of the GNU GPL, version 2.  See
   5 * the COPYING file in the top-level directory.
   6 *
   7 * Contributions after 2012-01-13 are licensed under the terms of the
   8 * GNU GPL, version 2 or (at your option) any later version.
   9 */
  10
  11#include "qemu/osdep.h"
  12
  13#include "cpu.h"
  14#include "hw/pci/pci.h"
  15#include "hw/pci/pci_host.h"
  16#include "hw/i386/pc.h"
  17#include "hw/i386/apic-msidef.h"
  18#include "hw/xen/xen_common.h"
  19#include "hw/xen/xen_backend.h"
  20#include "qapi/error.h"
  21#include "qapi/qapi-commands-misc.h"
  22#include "qemu/error-report.h"
  23#include "qemu/range.h"
  24#include "sysemu/xen-mapcache.h"
  25#include "trace.h"
  26#include "exec/address-spaces.h"
  27
  28#include <xen/hvm/ioreq.h>
  29#include <xen/hvm/params.h>
  30#include <xen/hvm/e820.h>
  31
  32//#define DEBUG_XEN_HVM
  33
  34#ifdef DEBUG_XEN_HVM
  35#define DPRINTF(fmt, ...) \
  36    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
  37#else
  38#define DPRINTF(fmt, ...) \
  39    do { } while (0)
  40#endif
  41
  42static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
  43static MemoryRegion *framebuffer;
  44static bool xen_in_migration;
  45
  46/* Compatibility with older version */
  47
  48/* This allows QEMU to build on a system that has Xen 4.5 or earlier
  49 * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
  50 * needs to be included before this block and hw/xen/xen_common.h needs to
  51 * be included before xen/hvm/ioreq.h
  52 */
  53#ifndef IOREQ_TYPE_VMWARE_PORT
  54#define IOREQ_TYPE_VMWARE_PORT  3
  55struct vmware_regs {
  56    uint32_t esi;
  57    uint32_t edi;
  58    uint32_t ebx;
  59    uint32_t ecx;
  60    uint32_t edx;
  61};
  62typedef struct vmware_regs vmware_regs_t;
  63
  64struct shared_vmport_iopage {
  65    struct vmware_regs vcpu_vmport_regs[1];
  66};
  67typedef struct shared_vmport_iopage shared_vmport_iopage_t;
  68#endif
  69
  70static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
  71{
  72    return shared_page->vcpu_ioreq[i].vp_eport;
  73}
  74static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
  75{
  76    return &shared_page->vcpu_ioreq[vcpu];
  77}
  78
  79#define BUFFER_IO_MAX_DELAY  100
  80
  81typedef struct XenPhysmap {
  82    hwaddr start_addr;
  83    ram_addr_t size;
  84    const char *name;
  85    hwaddr phys_offset;
  86
  87    QLIST_ENTRY(XenPhysmap) list;
  88} XenPhysmap;
  89
  90static QLIST_HEAD(, XenPhysmap) xen_physmap;
  91
  92typedef struct XenPciDevice {
  93    PCIDevice *pci_dev;
  94    uint32_t sbdf;
  95    QLIST_ENTRY(XenPciDevice) entry;
  96} XenPciDevice;
  97
  98typedef struct XenIOState {
  99    ioservid_t ioservid;
 100    shared_iopage_t *shared_page;
 101    shared_vmport_iopage_t *shared_vmport_page;
 102    buffered_iopage_t *buffered_io_page;
 103    QEMUTimer *buffered_io_timer;
 104    CPUState **cpu_by_vcpu_id;
 105    /* the evtchn port for polling the notification, */
 106    evtchn_port_t *ioreq_local_port;
 107    /* evtchn remote and local ports for buffered io */
 108    evtchn_port_t bufioreq_remote_port;
 109    evtchn_port_t bufioreq_local_port;
 110    /* the evtchn fd for polling */
 111    xenevtchn_handle *xce_handle;
 112    /* which vcpu we are serving */
 113    int send_vcpu;
 114
 115    struct xs_handle *xenstore;
 116    MemoryListener memory_listener;
 117    MemoryListener io_listener;
 118    QLIST_HEAD(, XenPciDevice) dev_list;
 119    DeviceListener device_listener;
 120    hwaddr free_phys_offset;
 121    const XenPhysmap *log_for_dirtybit;
 122
 123    Notifier exit;
 124    Notifier suspend;
 125    Notifier wakeup;
 126} XenIOState;
 127
 128/* Xen specific function for piix pci */
 129
 130int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
 131{
 132    return irq_num + ((pci_dev->devfn >> 3) << 2);
 133}
 134
 135void xen_piix3_set_irq(void *opaque, int irq_num, int level)
 136{
 137    xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
 138                           irq_num & 3, level);
 139}
 140
 141void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
 142{
 143    int i;
 144
 145    /* Scan for updates to PCI link routes (0x60-0x63). */
 146    for (i = 0; i < len; i++) {
 147        uint8_t v = (val >> (8 * i)) & 0xff;
 148        if (v & 0x80) {
 149            v = 0;
 150        }
 151        v &= 0xf;
 152        if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
 153            xen_set_pci_link_route(xen_domid, address + i - 0x60, v);
 154        }
 155    }
 156}
 157
 158int xen_is_pirq_msi(uint32_t msi_data)
 159{
 160    /* If vector is 0, the msi is remapped into a pirq, passed as
 161     * dest_id.
 162     */
 163    return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0;
 164}
 165
 166void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
 167{
 168    xen_inject_msi(xen_domid, addr, data);
 169}
 170
 171static void xen_suspend_notifier(Notifier *notifier, void *data)
 172{
 173    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
 174}
 175
 176/* Xen Interrupt Controller */
 177
 178static void xen_set_irq(void *opaque, int irq, int level)
 179{
 180    xen_set_isa_irq_level(xen_domid, irq, level);
 181}
 182
 183qemu_irq *xen_interrupt_controller_init(void)
 184{
 185    return qemu_allocate_irqs(xen_set_irq, NULL, 16);
 186}
 187
 188/* Memory Ops */
 189
 190static void xen_ram_init(PCMachineState *pcms,
 191                         ram_addr_t ram_size, MemoryRegion **ram_memory_p)
 192{
 193    MemoryRegion *sysmem = get_system_memory();
 194    ram_addr_t block_len;
 195    uint64_t user_lowmem = object_property_get_uint(qdev_get_machine(),
 196                                                    PC_MACHINE_MAX_RAM_BELOW_4G,
 197                                                    &error_abort);
 198
 199    /* Handle the machine opt max-ram-below-4g.  It is basically doing
 200     * min(xen limit, user limit).
 201     */
 202    if (!user_lowmem) {
 203        user_lowmem = HVM_BELOW_4G_RAM_END; /* default */
 204    }
 205    if (HVM_BELOW_4G_RAM_END <= user_lowmem) {
 206        user_lowmem = HVM_BELOW_4G_RAM_END;
 207    }
 208
 209    if (ram_size >= user_lowmem) {
 210        pcms->above_4g_mem_size = ram_size - user_lowmem;
 211        pcms->below_4g_mem_size = user_lowmem;
 212    } else {
 213        pcms->above_4g_mem_size = 0;
 214        pcms->below_4g_mem_size = ram_size;
 215    }
 216    if (!pcms->above_4g_mem_size) {
 217        block_len = ram_size;
 218    } else {
 219        /*
 220         * Xen does not allocate the memory continuously, it keeps a
 221         * hole of the size computed above or passed in.
 222         */
 223        block_len = (1ULL << 32) + pcms->above_4g_mem_size;
 224    }
 225    memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
 226                           &error_fatal);
 227    *ram_memory_p = &ram_memory;
 228
 229    memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
 230                             &ram_memory, 0, 0xa0000);
 231    memory_region_add_subregion(sysmem, 0, &ram_640k);
 232    /* Skip of the VGA IO memory space, it will be registered later by the VGA
 233     * emulated device.
 234     *
 235     * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
 236     * the Options ROM, so it is registered here as RAM.
 237     */
 238    memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
 239                             &ram_memory, 0xc0000,
 240                             pcms->below_4g_mem_size - 0xc0000);
 241    memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
 242    if (pcms->above_4g_mem_size > 0) {
 243        memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
 244                                 &ram_memory, 0x100000000ULL,
 245                                 pcms->above_4g_mem_size);
 246        memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
 247    }
 248}
 249
 250void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
 251                   Error **errp)
 252{
 253    unsigned long nr_pfn;
 254    xen_pfn_t *pfn_list;
 255    int i;
 256
 257    if (runstate_check(RUN_STATE_INMIGRATE)) {
 258        /* RAM already populated in Xen */
 259        fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
 260                " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
 261                __func__, size, ram_addr); 
 262        return;
 263    }
 264
 265    if (mr == &ram_memory) {
 266        return;
 267    }
 268
 269    trace_xen_ram_alloc(ram_addr, size);
 270
 271    nr_pfn = size >> TARGET_PAGE_BITS;
 272    pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
 273
 274    for (i = 0; i < nr_pfn; i++) {
 275        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
 276    }
 277
 278    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
 279        error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
 280                   ram_addr);
 281    }
 282
 283    g_free(pfn_list);
 284}
 285
 286static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size)
 287{
 288    XenPhysmap *physmap = NULL;
 289
 290    start_addr &= TARGET_PAGE_MASK;
 291
 292    QLIST_FOREACH(physmap, &xen_physmap, list) {
 293        if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
 294            return physmap;
 295        }
 296    }
 297    return NULL;
 298}
 299
 300static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size)
 301{
 302    hwaddr addr = phys_offset & TARGET_PAGE_MASK;
 303    XenPhysmap *physmap = NULL;
 304
 305    QLIST_FOREACH(physmap, &xen_physmap, list) {
 306        if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
 307            return physmap->start_addr + (phys_offset - physmap->phys_offset);
 308        }
 309    }
 310
 311    return phys_offset;
 312}
 313
 314#ifdef XEN_COMPAT_PHYSMAP
 315static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
 316{
 317    char path[80], value[17];
 318
 319    snprintf(path, sizeof(path),
 320            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
 321            xen_domid, (uint64_t)physmap->phys_offset);
 322    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->start_addr);
 323    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
 324        return -1;
 325    }
 326    snprintf(path, sizeof(path),
 327            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
 328            xen_domid, (uint64_t)physmap->phys_offset);
 329    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->size);
 330    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
 331        return -1;
 332    }
 333    if (physmap->name) {
 334        snprintf(path, sizeof(path),
 335                "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
 336                xen_domid, (uint64_t)physmap->phys_offset);
 337        if (!xs_write(state->xenstore, 0, path,
 338                      physmap->name, strlen(physmap->name))) {
 339            return -1;
 340        }
 341    }
 342    return 0;
 343}
 344#else
 345static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
 346{
 347    return 0;
 348}
 349#endif
 350
 351static int xen_add_to_physmap(XenIOState *state,
 352                              hwaddr start_addr,
 353                              ram_addr_t size,
 354                              MemoryRegion *mr,
 355                              hwaddr offset_within_region)
 356{
 357    unsigned long nr_pages;
 358    int rc = 0;
 359    XenPhysmap *physmap = NULL;
 360    hwaddr pfn, start_gpfn;
 361    hwaddr phys_offset = memory_region_get_ram_addr(mr);
 362    const char *mr_name;
 363
 364    if (get_physmapping(start_addr, size)) {
 365        return 0;
 366    }
 367    if (size <= 0) {
 368        return -1;
 369    }
 370
 371    /* Xen can only handle a single dirty log region for now and we want
 372     * the linear framebuffer to be that region.
 373     * Avoid tracking any regions that is not videoram and avoid tracking
 374     * the legacy vga region. */
 375    if (mr == framebuffer && start_addr > 0xbffff) {
 376        goto go_physmap;
 377    }
 378    return -1;
 379
 380go_physmap:
 381    DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
 382            start_addr, start_addr + size);
 383
 384    mr_name = memory_region_name(mr);
 385
 386    physmap = g_malloc(sizeof(XenPhysmap));
 387
 388    physmap->start_addr = start_addr;
 389    physmap->size = size;
 390    physmap->name = mr_name;
 391    physmap->phys_offset = phys_offset;
 392
 393    QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
 394
 395    if (runstate_check(RUN_STATE_INMIGRATE)) {
 396        /* Now when we have a physmap entry we can replace a dummy mapping with
 397         * a real one of guest foreign memory. */
 398        uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size);
 399        assert(p && p == memory_region_get_ram_ptr(mr));
 400
 401        return 0;
 402    }
 403
 404    pfn = phys_offset >> TARGET_PAGE_BITS;
 405    start_gpfn = start_addr >> TARGET_PAGE_BITS;
 406    nr_pages = size >> TARGET_PAGE_BITS;
 407    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, nr_pages, pfn,
 408                                        start_gpfn);
 409    if (rc) {
 410        int saved_errno = errno;
 411
 412        error_report("relocate_memory %lu pages from GFN %"HWADDR_PRIx
 413                     " to GFN %"HWADDR_PRIx" failed: %s",
 414                     nr_pages, pfn, start_gpfn, strerror(saved_errno));
 415        errno = saved_errno;
 416        return -1;
 417    }
 418
 419    rc = xendevicemodel_pin_memory_cacheattr(xen_dmod, xen_domid,
 420                                   start_addr >> TARGET_PAGE_BITS,
 421                                   (start_addr + size - 1) >> TARGET_PAGE_BITS,
 422                                   XEN_DOMCTL_MEM_CACHEATTR_WB);
 423    if (rc) {
 424        error_report("pin_memory_cacheattr failed: %s", strerror(errno));
 425    }
 426    return xen_save_physmap(state, physmap);
 427}
 428
 429static int xen_remove_from_physmap(XenIOState *state,
 430                                   hwaddr start_addr,
 431                                   ram_addr_t size)
 432{
 433    int rc = 0;
 434    XenPhysmap *physmap = NULL;
 435    hwaddr phys_offset = 0;
 436
 437    physmap = get_physmapping(start_addr, size);
 438    if (physmap == NULL) {
 439        return -1;
 440    }
 441
 442    phys_offset = physmap->phys_offset;
 443    size = physmap->size;
 444
 445    DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
 446            "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
 447
 448    size >>= TARGET_PAGE_BITS;
 449    start_addr >>= TARGET_PAGE_BITS;
 450    phys_offset >>= TARGET_PAGE_BITS;
 451    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, size, start_addr,
 452                                        phys_offset);
 453    if (rc) {
 454        int saved_errno = errno;
 455
 456        error_report("relocate_memory "RAM_ADDR_FMT" pages"
 457                     " from GFN %"HWADDR_PRIx
 458                     " to GFN %"HWADDR_PRIx" failed: %s",
 459                     size, start_addr, phys_offset, strerror(saved_errno));
 460        errno = saved_errno;
 461        return -1;
 462    }
 463
 464    QLIST_REMOVE(physmap, list);
 465    if (state->log_for_dirtybit == physmap) {
 466        state->log_for_dirtybit = NULL;
 467    }
 468    g_free(physmap);
 469
 470    return 0;
 471}
 472
 473static void xen_set_memory(struct MemoryListener *listener,
 474                           MemoryRegionSection *section,
 475                           bool add)
 476{
 477    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 478    hwaddr start_addr = section->offset_within_address_space;
 479    ram_addr_t size = int128_get64(section->size);
 480    bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA);
 481    hvmmem_type_t mem_type;
 482
 483    if (section->mr == &ram_memory) {
 484        return;
 485    } else {
 486        if (add) {
 487            xen_map_memory_section(xen_domid, state->ioservid,
 488                                   section);
 489        } else {
 490            xen_unmap_memory_section(xen_domid, state->ioservid,
 491                                     section);
 492        }
 493    }
 494
 495    if (!memory_region_is_ram(section->mr)) {
 496        return;
 497    }
 498
 499    if (log_dirty != add) {
 500        return;
 501    }
 502
 503    trace_xen_client_set_memory(start_addr, size, log_dirty);
 504
 505    start_addr &= TARGET_PAGE_MASK;
 506    size = TARGET_PAGE_ALIGN(size);
 507
 508    if (add) {
 509        if (!memory_region_is_rom(section->mr)) {
 510            xen_add_to_physmap(state, start_addr, size,
 511                               section->mr, section->offset_within_region);
 512        } else {
 513            mem_type = HVMMEM_ram_ro;
 514            if (xen_set_mem_type(xen_domid, mem_type,
 515                                 start_addr >> TARGET_PAGE_BITS,
 516                                 size >> TARGET_PAGE_BITS)) {
 517                DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n",
 518                        start_addr);
 519            }
 520        }
 521    } else {
 522        if (xen_remove_from_physmap(state, start_addr, size) < 0) {
 523            DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
 524        }
 525    }
 526}
 527
 528static void xen_region_add(MemoryListener *listener,
 529                           MemoryRegionSection *section)
 530{
 531    memory_region_ref(section->mr);
 532    xen_set_memory(listener, section, true);
 533}
 534
 535static void xen_region_del(MemoryListener *listener,
 536                           MemoryRegionSection *section)
 537{
 538    xen_set_memory(listener, section, false);
 539    memory_region_unref(section->mr);
 540}
 541
 542static void xen_io_add(MemoryListener *listener,
 543                       MemoryRegionSection *section)
 544{
 545    XenIOState *state = container_of(listener, XenIOState, io_listener);
 546    MemoryRegion *mr = section->mr;
 547
 548    if (mr->ops == &unassigned_io_ops) {
 549        return;
 550    }
 551
 552    memory_region_ref(mr);
 553
 554    xen_map_io_section(xen_domid, state->ioservid, section);
 555}
 556
 557static void xen_io_del(MemoryListener *listener,
 558                       MemoryRegionSection *section)
 559{
 560    XenIOState *state = container_of(listener, XenIOState, io_listener);
 561    MemoryRegion *mr = section->mr;
 562
 563    if (mr->ops == &unassigned_io_ops) {
 564        return;
 565    }
 566
 567    xen_unmap_io_section(xen_domid, state->ioservid, section);
 568
 569    memory_region_unref(mr);
 570}
 571
 572static void xen_device_realize(DeviceListener *listener,
 573                               DeviceState *dev)
 574{
 575    XenIOState *state = container_of(listener, XenIOState, device_listener);
 576
 577    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 578        PCIDevice *pci_dev = PCI_DEVICE(dev);
 579        XenPciDevice *xendev = g_new(XenPciDevice, 1);
 580
 581        xendev->pci_dev = pci_dev;
 582        xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
 583                                     pci_dev->devfn);
 584        QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
 585
 586        xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
 587    }
 588}
 589
 590static void xen_device_unrealize(DeviceListener *listener,
 591                                 DeviceState *dev)
 592{
 593    XenIOState *state = container_of(listener, XenIOState, device_listener);
 594
 595    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 596        PCIDevice *pci_dev = PCI_DEVICE(dev);
 597        XenPciDevice *xendev, *next;
 598
 599        xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
 600
 601        QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
 602            if (xendev->pci_dev == pci_dev) {
 603                QLIST_REMOVE(xendev, entry);
 604                g_free(xendev);
 605                break;
 606            }
 607        }
 608    }
 609}
 610
 611static void xen_sync_dirty_bitmap(XenIOState *state,
 612                                  hwaddr start_addr,
 613                                  ram_addr_t size)
 614{
 615    hwaddr npages = size >> TARGET_PAGE_BITS;
 616    const int width = sizeof(unsigned long) * 8;
 617    unsigned long bitmap[DIV_ROUND_UP(npages, width)];
 618    int rc, i, j;
 619    const XenPhysmap *physmap = NULL;
 620
 621    physmap = get_physmapping(start_addr, size);
 622    if (physmap == NULL) {
 623        /* not handled */
 624        return;
 625    }
 626
 627    if (state->log_for_dirtybit == NULL) {
 628        state->log_for_dirtybit = physmap;
 629    } else if (state->log_for_dirtybit != physmap) {
 630        /* Only one range for dirty bitmap can be tracked. */
 631        return;
 632    }
 633
 634    rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
 635                              npages, bitmap);
 636    if (rc < 0) {
 637#ifndef ENODATA
 638#define ENODATA  ENOENT
 639#endif
 640        if (errno == ENODATA) {
 641            memory_region_set_dirty(framebuffer, 0, size);
 642            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
 643                    ", 0x" TARGET_FMT_plx "): %s\n",
 644                    start_addr, start_addr + size, strerror(errno));
 645        }
 646        return;
 647    }
 648
 649    for (i = 0; i < ARRAY_SIZE(bitmap); i++) {
 650        unsigned long map = bitmap[i];
 651        while (map != 0) {
 652            j = ctzl(map);
 653            map &= ~(1ul << j);
 654            memory_region_set_dirty(framebuffer,
 655                                    (i * width + j) * TARGET_PAGE_SIZE,
 656                                    TARGET_PAGE_SIZE);
 657        };
 658    }
 659}
 660
 661static void xen_log_start(MemoryListener *listener,
 662                          MemoryRegionSection *section,
 663                          int old, int new)
 664{
 665    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 666
 667    if (new & ~old & (1 << DIRTY_MEMORY_VGA)) {
 668        xen_sync_dirty_bitmap(state, section->offset_within_address_space,
 669                              int128_get64(section->size));
 670    }
 671}
 672
 673static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
 674                         int old, int new)
 675{
 676    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 677
 678    if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
 679        state->log_for_dirtybit = NULL;
 680        /* Disable dirty bit tracking */
 681        xen_track_dirty_vram(xen_domid, 0, 0, NULL);
 682    }
 683}
 684
 685static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
 686{
 687    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 688
 689    xen_sync_dirty_bitmap(state, section->offset_within_address_space,
 690                          int128_get64(section->size));
 691}
 692
 693static void xen_log_global_start(MemoryListener *listener)
 694{
 695    if (xen_enabled()) {
 696        xen_in_migration = true;
 697    }
 698}
 699
 700static void xen_log_global_stop(MemoryListener *listener)
 701{
 702    xen_in_migration = false;
 703}
 704
 705static MemoryListener xen_memory_listener = {
 706    .region_add = xen_region_add,
 707    .region_del = xen_region_del,
 708    .log_start = xen_log_start,
 709    .log_stop = xen_log_stop,
 710    .log_sync = xen_log_sync,
 711    .log_global_start = xen_log_global_start,
 712    .log_global_stop = xen_log_global_stop,
 713    .priority = 10,
 714};
 715
 716static MemoryListener xen_io_listener = {
 717    .region_add = xen_io_add,
 718    .region_del = xen_io_del,
 719    .priority = 10,
 720};
 721
 722static DeviceListener xen_device_listener = {
 723    .realize = xen_device_realize,
 724    .unrealize = xen_device_unrealize,
 725};
 726
 727/* get the ioreq packets from share mem */
 728static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
 729{
 730    ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
 731
 732    if (req->state != STATE_IOREQ_READY) {
 733        DPRINTF("I/O request not ready: "
 734                "%x, ptr: %x, port: %"PRIx64", "
 735                "data: %"PRIx64", count: %u, size: %u\n",
 736                req->state, req->data_is_ptr, req->addr,
 737                req->data, req->count, req->size);
 738        return NULL;
 739    }
 740
 741    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
 742
 743    req->state = STATE_IOREQ_INPROCESS;
 744    return req;
 745}
 746
 747/* use poll to get the port notification */
 748/* ioreq_vec--out,the */
 749/* retval--the number of ioreq packet */
 750static ioreq_t *cpu_get_ioreq(XenIOState *state)
 751{
 752    int i;
 753    evtchn_port_t port;
 754
 755    port = xenevtchn_pending(state->xce_handle);
 756    if (port == state->bufioreq_local_port) {
 757        timer_mod(state->buffered_io_timer,
 758                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
 759        return NULL;
 760    }
 761
 762    if (port != -1) {
 763        for (i = 0; i < max_cpus; i++) {
 764            if (state->ioreq_local_port[i] == port) {
 765                break;
 766            }
 767        }
 768
 769        if (i == max_cpus) {
 770            hw_error("Fatal error while trying to get io event!\n");
 771        }
 772
 773        /* unmask the wanted port again */
 774        xenevtchn_unmask(state->xce_handle, port);
 775
 776        /* get the io packet from shared memory */
 777        state->send_vcpu = i;
 778        return cpu_get_ioreq_from_shared_memory(state, i);
 779    }
 780
 781    /* read error or read nothing */
 782    return NULL;
 783}
 784
 785static uint32_t do_inp(uint32_t addr, unsigned long size)
 786{
 787    switch (size) {
 788        case 1:
 789            return cpu_inb(addr);
 790        case 2:
 791            return cpu_inw(addr);
 792        case 4:
 793            return cpu_inl(addr);
 794        default:
 795            hw_error("inp: bad size: %04x %lx", addr, size);
 796    }
 797}
 798
 799static void do_outp(uint32_t addr,
 800        unsigned long size, uint32_t val)
 801{
 802    switch (size) {
 803        case 1:
 804            return cpu_outb(addr, val);
 805        case 2:
 806            return cpu_outw(addr, val);
 807        case 4:
 808            return cpu_outl(addr, val);
 809        default:
 810            hw_error("outp: bad size: %04x %lx", addr, size);
 811    }
 812}
 813
 814/*
 815 * Helper functions which read/write an object from/to physical guest
 816 * memory, as part of the implementation of an ioreq.
 817 *
 818 * Equivalent to
 819 *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
 820 *                          val, req->size, 0/1)
 821 * except without the integer overflow problems.
 822 */
 823static void rw_phys_req_item(hwaddr addr,
 824                             ioreq_t *req, uint32_t i, void *val, int rw)
 825{
 826    /* Do everything unsigned so overflow just results in a truncated result
 827     * and accesses to undesired parts of guest memory, which is up
 828     * to the guest */
 829    hwaddr offset = (hwaddr)req->size * i;
 830    if (req->df) {
 831        addr -= offset;
 832    } else {
 833        addr += offset;
 834    }
 835    cpu_physical_memory_rw(addr, val, req->size, rw);
 836}
 837
 838static inline void read_phys_req_item(hwaddr addr,
 839                                      ioreq_t *req, uint32_t i, void *val)
 840{
 841    rw_phys_req_item(addr, req, i, val, 0);
 842}
 843static inline void write_phys_req_item(hwaddr addr,
 844                                       ioreq_t *req, uint32_t i, void *val)
 845{
 846    rw_phys_req_item(addr, req, i, val, 1);
 847}
 848
 849
 850static void cpu_ioreq_pio(ioreq_t *req)
 851{
 852    uint32_t i;
 853
 854    trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
 855                         req->data, req->count, req->size);
 856
 857    if (req->size > sizeof(uint32_t)) {
 858        hw_error("PIO: bad size (%u)", req->size);
 859    }
 860
 861    if (req->dir == IOREQ_READ) {
 862        if (!req->data_is_ptr) {
 863            req->data = do_inp(req->addr, req->size);
 864            trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
 865                                         req->size);
 866        } else {
 867            uint32_t tmp;
 868
 869            for (i = 0; i < req->count; i++) {
 870                tmp = do_inp(req->addr, req->size);
 871                write_phys_req_item(req->data, req, i, &tmp);
 872            }
 873        }
 874    } else if (req->dir == IOREQ_WRITE) {
 875        if (!req->data_is_ptr) {
 876            trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
 877                                          req->size);
 878            do_outp(req->addr, req->size, req->data);
 879        } else {
 880            for (i = 0; i < req->count; i++) {
 881                uint32_t tmp = 0;
 882
 883                read_phys_req_item(req->data, req, i, &tmp);
 884                do_outp(req->addr, req->size, tmp);
 885            }
 886        }
 887    }
 888}
 889
 890static void cpu_ioreq_move(ioreq_t *req)
 891{
 892    uint32_t i;
 893
 894    trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
 895                         req->data, req->count, req->size);
 896
 897    if (req->size > sizeof(req->data)) {
 898        hw_error("MMIO: bad size (%u)", req->size);
 899    }
 900
 901    if (!req->data_is_ptr) {
 902        if (req->dir == IOREQ_READ) {
 903            for (i = 0; i < req->count; i++) {
 904                read_phys_req_item(req->addr, req, i, &req->data);
 905            }
 906        } else if (req->dir == IOREQ_WRITE) {
 907            for (i = 0; i < req->count; i++) {
 908                write_phys_req_item(req->addr, req, i, &req->data);
 909            }
 910        }
 911    } else {
 912        uint64_t tmp;
 913
 914        if (req->dir == IOREQ_READ) {
 915            for (i = 0; i < req->count; i++) {
 916                read_phys_req_item(req->addr, req, i, &tmp);
 917                write_phys_req_item(req->data, req, i, &tmp);
 918            }
 919        } else if (req->dir == IOREQ_WRITE) {
 920            for (i = 0; i < req->count; i++) {
 921                read_phys_req_item(req->data, req, i, &tmp);
 922                write_phys_req_item(req->addr, req, i, &tmp);
 923            }
 924        }
 925    }
 926}
 927
 928static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
 929{
 930    uint32_t sbdf = req->addr >> 32;
 931    uint32_t reg = req->addr;
 932    XenPciDevice *xendev;
 933
 934    if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
 935        req->size != sizeof(uint32_t)) {
 936        hw_error("PCI config access: bad size (%u)", req->size);
 937    }
 938
 939    if (req->count != 1) {
 940        hw_error("PCI config access: bad count (%u)", req->count);
 941    }
 942
 943    QLIST_FOREACH(xendev, &state->dev_list, entry) {
 944        if (xendev->sbdf != sbdf) {
 945            continue;
 946        }
 947
 948        if (!req->data_is_ptr) {
 949            if (req->dir == IOREQ_READ) {
 950                req->data = pci_host_config_read_common(
 951                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 952                    req->size);
 953                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
 954                                            req->size, req->data);
 955            } else if (req->dir == IOREQ_WRITE) {
 956                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
 957                                             req->size, req->data);
 958                pci_host_config_write_common(
 959                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 960                    req->data, req->size);
 961            }
 962        } else {
 963            uint32_t tmp;
 964
 965            if (req->dir == IOREQ_READ) {
 966                tmp = pci_host_config_read_common(
 967                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 968                    req->size);
 969                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
 970                                            req->size, tmp);
 971                write_phys_req_item(req->data, req, 0, &tmp);
 972            } else if (req->dir == IOREQ_WRITE) {
 973                read_phys_req_item(req->data, req, 0, &tmp);
 974                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
 975                                             req->size, tmp);
 976                pci_host_config_write_common(
 977                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 978                    tmp, req->size);
 979            }
 980        }
 981    }
 982}
 983
 984static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req)
 985{
 986    X86CPU *cpu;
 987    CPUX86State *env;
 988
 989    cpu = X86_CPU(current_cpu);
 990    env = &cpu->env;
 991    env->regs[R_EAX] = req->data;
 992    env->regs[R_EBX] = vmport_regs->ebx;
 993    env->regs[R_ECX] = vmport_regs->ecx;
 994    env->regs[R_EDX] = vmport_regs->edx;
 995    env->regs[R_ESI] = vmport_regs->esi;
 996    env->regs[R_EDI] = vmport_regs->edi;
 997}
 998
 999static void regs_from_cpu(vmware_regs_t *vmport_regs)
1000{
1001    X86CPU *cpu = X86_CPU(current_cpu);
1002    CPUX86State *env = &cpu->env;
1003
1004    vmport_regs->ebx = env->regs[R_EBX];
1005    vmport_regs->ecx = env->regs[R_ECX];
1006    vmport_regs->edx = env->regs[R_EDX];
1007    vmport_regs->esi = env->regs[R_ESI];
1008    vmport_regs->edi = env->regs[R_EDI];
1009}
1010
1011static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req)
1012{
1013    vmware_regs_t *vmport_regs;
1014
1015    assert(state->shared_vmport_page);
1016    vmport_regs =
1017        &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu];
1018    QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs));
1019
1020    current_cpu = state->cpu_by_vcpu_id[state->send_vcpu];
1021    regs_to_cpu(vmport_regs, req);
1022    cpu_ioreq_pio(req);
1023    regs_from_cpu(vmport_regs);
1024    current_cpu = NULL;
1025}
1026
1027static void handle_ioreq(XenIOState *state, ioreq_t *req)
1028{
1029    trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
1030                       req->addr, req->data, req->count, req->size);
1031
1032    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
1033            (req->size < sizeof (target_ulong))) {
1034        req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
1035    }
1036
1037    if (req->dir == IOREQ_WRITE)
1038        trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
1039                                 req->addr, req->data, req->count, req->size);
1040
1041    switch (req->type) {
1042        case IOREQ_TYPE_PIO:
1043            cpu_ioreq_pio(req);
1044            break;
1045        case IOREQ_TYPE_COPY:
1046            cpu_ioreq_move(req);
1047            break;
1048        case IOREQ_TYPE_VMWARE_PORT:
1049            handle_vmport_ioreq(state, req);
1050            break;
1051        case IOREQ_TYPE_TIMEOFFSET:
1052            break;
1053        case IOREQ_TYPE_INVALIDATE:
1054            xen_invalidate_map_cache();
1055            break;
1056        case IOREQ_TYPE_PCI_CONFIG:
1057            cpu_ioreq_config(state, req);
1058            break;
1059        default:
1060            hw_error("Invalid ioreq type 0x%x\n", req->type);
1061    }
1062    if (req->dir == IOREQ_READ) {
1063        trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
1064                                req->addr, req->data, req->count, req->size);
1065    }
1066}
1067
1068static int handle_buffered_iopage(XenIOState *state)
1069{
1070    buffered_iopage_t *buf_page = state->buffered_io_page;
1071    buf_ioreq_t *buf_req = NULL;
1072    ioreq_t req;
1073    int qw;
1074
1075    if (!buf_page) {
1076        return 0;
1077    }
1078
1079    memset(&req, 0x00, sizeof(req));
1080    req.state = STATE_IOREQ_READY;
1081    req.count = 1;
1082    req.dir = IOREQ_WRITE;
1083
1084    for (;;) {
1085        uint32_t rdptr = buf_page->read_pointer, wrptr;
1086
1087        xen_rmb();
1088        wrptr = buf_page->write_pointer;
1089        xen_rmb();
1090        if (rdptr != buf_page->read_pointer) {
1091            continue;
1092        }
1093        if (rdptr == wrptr) {
1094            break;
1095        }
1096        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
1097        req.size = 1U << buf_req->size;
1098        req.addr = buf_req->addr;
1099        req.data = buf_req->data;
1100        req.type = buf_req->type;
1101        xen_rmb();
1102        qw = (req.size == 8);
1103        if (qw) {
1104            if (rdptr + 1 == wrptr) {
1105                hw_error("Incomplete quad word buffered ioreq");
1106            }
1107            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
1108                                           IOREQ_BUFFER_SLOT_NUM];
1109            req.data |= ((uint64_t)buf_req->data) << 32;
1110            xen_rmb();
1111        }
1112
1113        handle_ioreq(state, &req);
1114
1115        /* Only req.data may get updated by handle_ioreq(), albeit even that
1116         * should not happen as such data would never make it to the guest (we
1117         * can only usefully see writes here after all).
1118         */
1119        assert(req.state == STATE_IOREQ_READY);
1120        assert(req.count == 1);
1121        assert(req.dir == IOREQ_WRITE);
1122        assert(!req.data_is_ptr);
1123
1124        atomic_add(&buf_page->read_pointer, qw + 1);
1125    }
1126
1127    return req.count;
1128}
1129
1130static void handle_buffered_io(void *opaque)
1131{
1132    XenIOState *state = opaque;
1133
1134    if (handle_buffered_iopage(state)) {
1135        timer_mod(state->buffered_io_timer,
1136                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
1137    } else {
1138        timer_del(state->buffered_io_timer);
1139        xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
1140    }
1141}
1142
1143static void cpu_handle_ioreq(void *opaque)
1144{
1145    XenIOState *state = opaque;
1146    ioreq_t *req = cpu_get_ioreq(state);
1147
1148    handle_buffered_iopage(state);
1149    if (req) {
1150        ioreq_t copy = *req;
1151
1152        xen_rmb();
1153        handle_ioreq(state, &copy);
1154        req->data = copy.data;
1155
1156        if (req->state != STATE_IOREQ_INPROCESS) {
1157            fprintf(stderr, "Badness in I/O request ... not in service?!: "
1158                    "%x, ptr: %x, port: %"PRIx64", "
1159                    "data: %"PRIx64", count: %u, size: %u, type: %u\n",
1160                    req->state, req->data_is_ptr, req->addr,
1161                    req->data, req->count, req->size, req->type);
1162            destroy_hvm_domain(false);
1163            return;
1164        }
1165
1166        xen_wmb(); /* Update ioreq contents /then/ update state. */
1167
1168        /*
1169         * We do this before we send the response so that the tools
1170         * have the opportunity to pick up on the reset before the
1171         * guest resumes and does a hlt with interrupts disabled which
1172         * causes Xen to powerdown the domain.
1173         */
1174        if (runstate_is_running()) {
1175            ShutdownCause request;
1176
1177            if (qemu_shutdown_requested_get()) {
1178                destroy_hvm_domain(false);
1179            }
1180            request = qemu_reset_requested_get();
1181            if (request) {
1182                qemu_system_reset(request);
1183                destroy_hvm_domain(true);
1184            }
1185        }
1186
1187        req->state = STATE_IORESP_READY;
1188        xenevtchn_notify(state->xce_handle,
1189                         state->ioreq_local_port[state->send_vcpu]);
1190    }
1191}
1192
1193static void xen_main_loop_prepare(XenIOState *state)
1194{
1195    int evtchn_fd = -1;
1196
1197    if (state->xce_handle != NULL) {
1198        evtchn_fd = xenevtchn_fd(state->xce_handle);
1199    }
1200
1201    state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
1202                                                 state);
1203
1204    if (evtchn_fd != -1) {
1205        CPUState *cpu_state;
1206
1207        DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
1208        CPU_FOREACH(cpu_state) {
1209            DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
1210                    __func__, cpu_state->cpu_index, cpu_state);
1211            state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
1212        }
1213        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
1214    }
1215}
1216
1217
1218static void xen_hvm_change_state_handler(void *opaque, int running,
1219                                         RunState rstate)
1220{
1221    XenIOState *state = opaque;
1222
1223    if (running) {
1224        xen_main_loop_prepare(state);
1225    }
1226
1227    xen_set_ioreq_server_state(xen_domid,
1228                               state->ioservid,
1229                               (rstate == RUN_STATE_RUNNING));
1230}
1231
1232static void xen_exit_notifier(Notifier *n, void *data)
1233{
1234    XenIOState *state = container_of(n, XenIOState, exit);
1235
1236    xenevtchn_close(state->xce_handle);
1237    xs_daemon_close(state->xenstore);
1238}
1239
1240#ifdef XEN_COMPAT_PHYSMAP
1241static void xen_read_physmap(XenIOState *state)
1242{
1243    XenPhysmap *physmap = NULL;
1244    unsigned int len, num, i;
1245    char path[80], *value = NULL;
1246    char **entries = NULL;
1247
1248    snprintf(path, sizeof(path),
1249            "/local/domain/0/device-model/%d/physmap", xen_domid);
1250    entries = xs_directory(state->xenstore, 0, path, &num);
1251    if (entries == NULL)
1252        return;
1253
1254    for (i = 0; i < num; i++) {
1255        physmap = g_malloc(sizeof (XenPhysmap));
1256        physmap->phys_offset = strtoull(entries[i], NULL, 16);
1257        snprintf(path, sizeof(path),
1258                "/local/domain/0/device-model/%d/physmap/%s/start_addr",
1259                xen_domid, entries[i]);
1260        value = xs_read(state->xenstore, 0, path, &len);
1261        if (value == NULL) {
1262            g_free(physmap);
1263            continue;
1264        }
1265        physmap->start_addr = strtoull(value, NULL, 16);
1266        free(value);
1267
1268        snprintf(path, sizeof(path),
1269                "/local/domain/0/device-model/%d/physmap/%s/size",
1270                xen_domid, entries[i]);
1271        value = xs_read(state->xenstore, 0, path, &len);
1272        if (value == NULL) {
1273            g_free(physmap);
1274            continue;
1275        }
1276        physmap->size = strtoull(value, NULL, 16);
1277        free(value);
1278
1279        snprintf(path, sizeof(path),
1280                "/local/domain/0/device-model/%d/physmap/%s/name",
1281                xen_domid, entries[i]);
1282        physmap->name = xs_read(state->xenstore, 0, path, &len);
1283
1284        QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
1285    }
1286    free(entries);
1287}
1288#else
1289static void xen_read_physmap(XenIOState *state)
1290{
1291}
1292#endif
1293
1294static void xen_wakeup_notifier(Notifier *notifier, void *data)
1295{
1296    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
1297}
1298
1299static int xen_map_ioreq_server(XenIOState *state)
1300{
1301    void *addr = NULL;
1302    xenforeignmemory_resource_handle *fres;
1303    xen_pfn_t ioreq_pfn;
1304    xen_pfn_t bufioreq_pfn;
1305    evtchn_port_t bufioreq_evtchn;
1306    int rc;
1307
1308    /*
1309     * Attempt to map using the resource API and fall back to normal
1310     * foreign mapping if this is not supported.
1311     */
1312    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
1313    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
1314    fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
1315                                         XENMEM_resource_ioreq_server,
1316                                         state->ioservid, 0, 2,
1317                                         &addr,
1318                                         PROT_READ | PROT_WRITE, 0);
1319    if (fres != NULL) {
1320        trace_xen_map_resource_ioreq(state->ioservid, addr);
1321        state->buffered_io_page = addr;
1322        state->shared_page = addr + TARGET_PAGE_SIZE;
1323    } else if (errno != EOPNOTSUPP) {
1324        error_report("failed to map ioreq server resources: error %d handle=%p",
1325                     errno, xen_xc);
1326        return -1;
1327    }
1328
1329    rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
1330                                   (state->shared_page == NULL) ?
1331                                   &ioreq_pfn : NULL,
1332                                   (state->buffered_io_page == NULL) ?
1333                                   &bufioreq_pfn : NULL,
1334                                   &bufioreq_evtchn);
1335    if (rc < 0) {
1336        error_report("failed to get ioreq server info: error %d handle=%p",
1337                     errno, xen_xc);
1338        return rc;
1339    }
1340
1341    if (state->shared_page == NULL) {
1342        DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
1343
1344        state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
1345                                                  PROT_READ | PROT_WRITE,
1346                                                  1, &ioreq_pfn, NULL);
1347        if (state->shared_page == NULL) {
1348            error_report("map shared IO page returned error %d handle=%p",
1349                         errno, xen_xc);
1350        }
1351    }
1352
1353    if (state->buffered_io_page == NULL) {
1354        DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
1355
1356        state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
1357                                                       PROT_READ | PROT_WRITE,
1358                                                       1, &bufioreq_pfn,
1359                                                       NULL);
1360        if (state->buffered_io_page == NULL) {
1361            error_report("map buffered IO page returned error %d", errno);
1362            return -1;
1363        }
1364    }
1365
1366    if (state->shared_page == NULL || state->buffered_io_page == NULL) {
1367        return -1;
1368    }
1369
1370    DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
1371
1372    state->bufioreq_remote_port = bufioreq_evtchn;
1373
1374    return 0;
1375}
1376
1377void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
1378{
1379    int i, rc;
1380    xen_pfn_t ioreq_pfn;
1381    XenIOState *state;
1382
1383    state = g_malloc0(sizeof (XenIOState));
1384
1385    state->xce_handle = xenevtchn_open(NULL, 0);
1386    if (state->xce_handle == NULL) {
1387        perror("xen: event channel open");
1388        goto err;
1389    }
1390
1391    state->xenstore = xs_daemon_open();
1392    if (state->xenstore == NULL) {
1393        perror("xen: xenstore open");
1394        goto err;
1395    }
1396
1397    xen_create_ioreq_server(xen_domid, &state->ioservid);
1398
1399    state->exit.notify = xen_exit_notifier;
1400    qemu_add_exit_notifier(&state->exit);
1401
1402    state->suspend.notify = xen_suspend_notifier;
1403    qemu_register_suspend_notifier(&state->suspend);
1404
1405    state->wakeup.notify = xen_wakeup_notifier;
1406    qemu_register_wakeup_notifier(&state->wakeup);
1407
1408    rc = xen_map_ioreq_server(state);
1409    if (rc < 0) {
1410        goto err;
1411    }
1412
1413    rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn);
1414    if (!rc) {
1415        DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn);
1416        state->shared_vmport_page =
1417            xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE,
1418                                 1, &ioreq_pfn, NULL);
1419        if (state->shared_vmport_page == NULL) {
1420            error_report("map shared vmport IO page returned error %d handle=%p",
1421                         errno, xen_xc);
1422            goto err;
1423        }
1424    } else if (rc != -ENOSYS) {
1425        error_report("get vmport regs pfn returned error %d, rc=%d",
1426                     errno, rc);
1427        goto err;
1428    }
1429
1430    /* Note: cpus is empty at this point in init */
1431    state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));
1432
1433    rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
1434    if (rc < 0) {
1435        error_report("failed to enable ioreq server info: error %d handle=%p",
1436                     errno, xen_xc);
1437        goto err;
1438    }
1439
1440    state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
1441
1442    /* FIXME: how about if we overflow the page here? */
1443    for (i = 0; i < max_cpus; i++) {
1444        rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1445                                        xen_vcpu_eport(state->shared_page, i));
1446        if (rc == -1) {
1447            error_report("shared evtchn %d bind error %d", i, errno);
1448            goto err;
1449        }
1450        state->ioreq_local_port[i] = rc;
1451    }
1452
1453    rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1454                                    state->bufioreq_remote_port);
1455    if (rc == -1) {
1456        error_report("buffered evtchn bind error %d", errno);
1457        goto err;
1458    }
1459    state->bufioreq_local_port = rc;
1460
1461    /* Init RAM management */
1462#ifdef XEN_COMPAT_PHYSMAP
1463    xen_map_cache_init(xen_phys_offset_to_gaddr, state);
1464#else
1465    xen_map_cache_init(NULL, state);
1466#endif
1467    xen_ram_init(pcms, ram_size, ram_memory);
1468
1469    qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
1470
1471    state->memory_listener = xen_memory_listener;
1472    memory_listener_register(&state->memory_listener, &address_space_memory);
1473    state->log_for_dirtybit = NULL;
1474
1475    state->io_listener = xen_io_listener;
1476    memory_listener_register(&state->io_listener, &address_space_io);
1477
1478    state->device_listener = xen_device_listener;
1479    QLIST_INIT(&state->dev_list);
1480    device_listener_register(&state->device_listener);
1481
1482    /* Initialize backend core & drivers */
1483    if (xen_be_init() != 0) {
1484        error_report("xen backend core setup failed");
1485        goto err;
1486    }
1487    xen_be_register_common();
1488
1489    QLIST_INIT(&xen_physmap);
1490    xen_read_physmap(state);
1491
1492    /* Disable ACPI build because Xen handles it */
1493    pcms->acpi_build_enabled = false;
1494
1495    return;
1496
1497err:
1498    error_report("xen hardware virtual machine initialisation failed");
1499    exit(1);
1500}
1501
1502void destroy_hvm_domain(bool reboot)
1503{
1504    xc_interface *xc_handle;
1505    int sts;
1506    int rc;
1507
1508    unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
1509
1510    if (xen_dmod) {
1511        rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
1512        if (!rc) {
1513            return;
1514        }
1515        if (errno != ENOTTY /* old Xen */) {
1516            perror("xendevicemodel_shutdown failed");
1517        }
1518        /* well, try the old thing then */
1519    }
1520
1521    xc_handle = xc_interface_open(0, 0, 0);
1522    if (xc_handle == NULL) {
1523        fprintf(stderr, "Cannot acquire xenctrl handle\n");
1524    } else {
1525        sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
1526        if (sts != 0) {
1527            fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
1528                    "sts %d, %s\n", reboot ? "reboot" : "poweroff",
1529                    sts, strerror(errno));
1530        } else {
1531            fprintf(stderr, "Issued domain %d %s\n", xen_domid,
1532                    reboot ? "reboot" : "poweroff");
1533        }
1534        xc_interface_close(xc_handle);
1535    }
1536}
1537
1538void xen_register_framebuffer(MemoryRegion *mr)
1539{
1540    framebuffer = mr;
1541}
1542
1543void xen_shutdown_fatal_error(const char *fmt, ...)
1544{
1545    va_list ap;
1546
1547    va_start(ap, fmt);
1548    vfprintf(stderr, fmt, ap);
1549    va_end(ap);
1550    fprintf(stderr, "Will destroy the domain.\n");
1551    /* destroy the domain */
1552    qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
1553}
1554
1555void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
1556{
1557    if (unlikely(xen_in_migration)) {
1558        int rc;
1559        ram_addr_t start_pfn, nb_pages;
1560
1561        start = xen_phys_offset_to_gaddr(start, length);
1562
1563        if (length == 0) {
1564            length = TARGET_PAGE_SIZE;
1565        }
1566        start_pfn = start >> TARGET_PAGE_BITS;
1567        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
1568            - start_pfn;
1569        rc = xen_modified_memory(xen_domid, start_pfn, nb_pages);
1570        if (rc) {
1571            fprintf(stderr,
1572                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
1573                    __func__, start, nb_pages, errno, strerror(errno));
1574        }
1575    }
1576}
1577
1578void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
1579{
1580    if (enable) {
1581        memory_global_dirty_log_start();
1582    } else {
1583        memory_global_dirty_log_stop();
1584    }
1585}
1586