qemu/hw/i386/xen/xen-hvm.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2010       Citrix Ltd.
   3 *
   4 * This work is licensed under the terms of the GNU GPL, version 2.  See
   5 * the COPYING file in the top-level directory.
   6 *
   7 * Contributions after 2012-01-13 are licensed under the terms of the
   8 * GNU GPL, version 2 or (at your option) any later version.
   9 */
  10
  11#include "qemu/osdep.h"
  12
  13#include "cpu.h"
  14#include "hw/pci/pci.h"
  15#include "hw/pci/pci_host.h"
  16#include "hw/i386/pc.h"
  17#include "hw/i386/apic-msidef.h"
  18#include "hw/xen/xen_common.h"
  19#include "hw/xen/xen-legacy-backend.h"
  20#include "hw/xen/xen-bus.h"
  21#include "qapi/error.h"
  22#include "qapi/qapi-commands-misc.h"
  23#include "qemu/error-report.h"
  24#include "qemu/range.h"
  25#include "sysemu/xen-mapcache.h"
  26#include "trace.h"
  27#include "exec/address-spaces.h"
  28
  29#include <xen/hvm/ioreq.h>
  30#include <xen/hvm/e820.h>
  31
  32//#define DEBUG_XEN_HVM
  33
  34#ifdef DEBUG_XEN_HVM
  35#define DPRINTF(fmt, ...) \
  36    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
  37#else
  38#define DPRINTF(fmt, ...) \
  39    do { } while (0)
  40#endif
  41
  42static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
  43static MemoryRegion *framebuffer;
  44static bool xen_in_migration;
  45
  46/* Compatibility with older version */
  47
  48/* This allows QEMU to build on a system that has Xen 4.5 or earlier
  49 * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
  50 * needs to be included before this block and hw/xen/xen_common.h needs to
  51 * be included before xen/hvm/ioreq.h
  52 */
  53#ifndef IOREQ_TYPE_VMWARE_PORT
  54#define IOREQ_TYPE_VMWARE_PORT  3
  55struct vmware_regs {
  56    uint32_t esi;
  57    uint32_t edi;
  58    uint32_t ebx;
  59    uint32_t ecx;
  60    uint32_t edx;
  61};
  62typedef struct vmware_regs vmware_regs_t;
  63
  64struct shared_vmport_iopage {
  65    struct vmware_regs vcpu_vmport_regs[1];
  66};
  67typedef struct shared_vmport_iopage shared_vmport_iopage_t;
  68#endif
  69
  70static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
  71{
  72    return shared_page->vcpu_ioreq[i].vp_eport;
  73}
  74static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
  75{
  76    return &shared_page->vcpu_ioreq[vcpu];
  77}
  78
  79#define BUFFER_IO_MAX_DELAY  100
  80
  81typedef struct XenPhysmap {
  82    hwaddr start_addr;
  83    ram_addr_t size;
  84    const char *name;
  85    hwaddr phys_offset;
  86
  87    QLIST_ENTRY(XenPhysmap) list;
  88} XenPhysmap;
  89
  90static QLIST_HEAD(, XenPhysmap) xen_physmap;
  91
  92typedef struct XenPciDevice {
  93    PCIDevice *pci_dev;
  94    uint32_t sbdf;
  95    QLIST_ENTRY(XenPciDevice) entry;
  96} XenPciDevice;
  97
  98typedef struct XenIOState {
  99    ioservid_t ioservid;
 100    shared_iopage_t *shared_page;
 101    shared_vmport_iopage_t *shared_vmport_page;
 102    buffered_iopage_t *buffered_io_page;
 103    QEMUTimer *buffered_io_timer;
 104    CPUState **cpu_by_vcpu_id;
 105    /* the evtchn port for polling the notification, */
 106    evtchn_port_t *ioreq_local_port;
 107    /* evtchn remote and local ports for buffered io */
 108    evtchn_port_t bufioreq_remote_port;
 109    evtchn_port_t bufioreq_local_port;
 110    /* the evtchn fd for polling */
 111    xenevtchn_handle *xce_handle;
 112    /* which vcpu we are serving */
 113    int send_vcpu;
 114
 115    struct xs_handle *xenstore;
 116    MemoryListener memory_listener;
 117    MemoryListener io_listener;
 118    QLIST_HEAD(, XenPciDevice) dev_list;
 119    DeviceListener device_listener;
 120    hwaddr free_phys_offset;
 121    const XenPhysmap *log_for_dirtybit;
 122    /* Buffer used by xen_sync_dirty_bitmap */
 123    unsigned long *dirty_bitmap;
 124
 125    Notifier exit;
 126    Notifier suspend;
 127    Notifier wakeup;
 128} XenIOState;
 129
 130/* Xen specific function for piix pci */
 131
 132int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
 133{
 134    return irq_num + ((pci_dev->devfn >> 3) << 2);
 135}
 136
 137void xen_piix3_set_irq(void *opaque, int irq_num, int level)
 138{
 139    xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
 140                           irq_num & 3, level);
 141}
 142
 143void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
 144{
 145    int i;
 146
 147    /* Scan for updates to PCI link routes (0x60-0x63). */
 148    for (i = 0; i < len; i++) {
 149        uint8_t v = (val >> (8 * i)) & 0xff;
 150        if (v & 0x80) {
 151            v = 0;
 152        }
 153        v &= 0xf;
 154        if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
 155            xen_set_pci_link_route(xen_domid, address + i - 0x60, v);
 156        }
 157    }
 158}
 159
 160int xen_is_pirq_msi(uint32_t msi_data)
 161{
 162    /* If vector is 0, the msi is remapped into a pirq, passed as
 163     * dest_id.
 164     */
 165    return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0;
 166}
 167
 168void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
 169{
 170    xen_inject_msi(xen_domid, addr, data);
 171}
 172
 173static void xen_suspend_notifier(Notifier *notifier, void *data)
 174{
 175    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
 176}
 177
 178/* Xen Interrupt Controller */
 179
 180static void xen_set_irq(void *opaque, int irq, int level)
 181{
 182    xen_set_isa_irq_level(xen_domid, irq, level);
 183}
 184
 185qemu_irq *xen_interrupt_controller_init(void)
 186{
 187    return qemu_allocate_irqs(xen_set_irq, NULL, 16);
 188}
 189
 190/* Memory Ops */
 191
 192static void xen_ram_init(PCMachineState *pcms,
 193                         ram_addr_t ram_size, MemoryRegion **ram_memory_p)
 194{
 195    MemoryRegion *sysmem = get_system_memory();
 196    ram_addr_t block_len;
 197    uint64_t user_lowmem = object_property_get_uint(qdev_get_machine(),
 198                                                    PC_MACHINE_MAX_RAM_BELOW_4G,
 199                                                    &error_abort);
 200
 201    /* Handle the machine opt max-ram-below-4g.  It is basically doing
 202     * min(xen limit, user limit).
 203     */
 204    if (!user_lowmem) {
 205        user_lowmem = HVM_BELOW_4G_RAM_END; /* default */
 206    }
 207    if (HVM_BELOW_4G_RAM_END <= user_lowmem) {
 208        user_lowmem = HVM_BELOW_4G_RAM_END;
 209    }
 210
 211    if (ram_size >= user_lowmem) {
 212        pcms->above_4g_mem_size = ram_size - user_lowmem;
 213        pcms->below_4g_mem_size = user_lowmem;
 214    } else {
 215        pcms->above_4g_mem_size = 0;
 216        pcms->below_4g_mem_size = ram_size;
 217    }
 218    if (!pcms->above_4g_mem_size) {
 219        block_len = ram_size;
 220    } else {
 221        /*
 222         * Xen does not allocate the memory continuously, it keeps a
 223         * hole of the size computed above or passed in.
 224         */
 225        block_len = (1ULL << 32) + pcms->above_4g_mem_size;
 226    }
 227    memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
 228                           &error_fatal);
 229    *ram_memory_p = &ram_memory;
 230
 231    memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
 232                             &ram_memory, 0, 0xa0000);
 233    memory_region_add_subregion(sysmem, 0, &ram_640k);
 234    /* Skip of the VGA IO memory space, it will be registered later by the VGA
 235     * emulated device.
 236     *
 237     * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
 238     * the Options ROM, so it is registered here as RAM.
 239     */
 240    memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
 241                             &ram_memory, 0xc0000,
 242                             pcms->below_4g_mem_size - 0xc0000);
 243    memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
 244    if (pcms->above_4g_mem_size > 0) {
 245        memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
 246                                 &ram_memory, 0x100000000ULL,
 247                                 pcms->above_4g_mem_size);
 248        memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
 249    }
 250}
 251
 252void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
 253                   Error **errp)
 254{
 255    unsigned long nr_pfn;
 256    xen_pfn_t *pfn_list;
 257    int i;
 258
 259    if (runstate_check(RUN_STATE_INMIGRATE)) {
 260        /* RAM already populated in Xen */
 261        fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
 262                " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
 263                __func__, size, ram_addr); 
 264        return;
 265    }
 266
 267    if (mr == &ram_memory) {
 268        return;
 269    }
 270
 271    trace_xen_ram_alloc(ram_addr, size);
 272
 273    nr_pfn = size >> TARGET_PAGE_BITS;
 274    pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
 275
 276    for (i = 0; i < nr_pfn; i++) {
 277        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
 278    }
 279
 280    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
 281        error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
 282                   ram_addr);
 283    }
 284
 285    g_free(pfn_list);
 286}
 287
 288static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size)
 289{
 290    XenPhysmap *physmap = NULL;
 291
 292    start_addr &= TARGET_PAGE_MASK;
 293
 294    QLIST_FOREACH(physmap, &xen_physmap, list) {
 295        if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
 296            return physmap;
 297        }
 298    }
 299    return NULL;
 300}
 301
 302static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size)
 303{
 304    hwaddr addr = phys_offset & TARGET_PAGE_MASK;
 305    XenPhysmap *physmap = NULL;
 306
 307    QLIST_FOREACH(physmap, &xen_physmap, list) {
 308        if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
 309            return physmap->start_addr + (phys_offset - physmap->phys_offset);
 310        }
 311    }
 312
 313    return phys_offset;
 314}
 315
 316#ifdef XEN_COMPAT_PHYSMAP
 317static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
 318{
 319    char path[80], value[17];
 320
 321    snprintf(path, sizeof(path),
 322            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
 323            xen_domid, (uint64_t)physmap->phys_offset);
 324    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->start_addr);
 325    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
 326        return -1;
 327    }
 328    snprintf(path, sizeof(path),
 329            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
 330            xen_domid, (uint64_t)physmap->phys_offset);
 331    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->size);
 332    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
 333        return -1;
 334    }
 335    if (physmap->name) {
 336        snprintf(path, sizeof(path),
 337                "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
 338                xen_domid, (uint64_t)physmap->phys_offset);
 339        if (!xs_write(state->xenstore, 0, path,
 340                      physmap->name, strlen(physmap->name))) {
 341            return -1;
 342        }
 343    }
 344    return 0;
 345}
 346#else
 347static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
 348{
 349    return 0;
 350}
 351#endif
 352
 353static int xen_add_to_physmap(XenIOState *state,
 354                              hwaddr start_addr,
 355                              ram_addr_t size,
 356                              MemoryRegion *mr,
 357                              hwaddr offset_within_region)
 358{
 359    unsigned long nr_pages;
 360    int rc = 0;
 361    XenPhysmap *physmap = NULL;
 362    hwaddr pfn, start_gpfn;
 363    hwaddr phys_offset = memory_region_get_ram_addr(mr);
 364    const char *mr_name;
 365
 366    if (get_physmapping(start_addr, size)) {
 367        return 0;
 368    }
 369    if (size <= 0) {
 370        return -1;
 371    }
 372
 373    /* Xen can only handle a single dirty log region for now and we want
 374     * the linear framebuffer to be that region.
 375     * Avoid tracking any regions that is not videoram and avoid tracking
 376     * the legacy vga region. */
 377    if (mr == framebuffer && start_addr > 0xbffff) {
 378        goto go_physmap;
 379    }
 380    return -1;
 381
 382go_physmap:
 383    DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
 384            start_addr, start_addr + size);
 385
 386    mr_name = memory_region_name(mr);
 387
 388    physmap = g_malloc(sizeof(XenPhysmap));
 389
 390    physmap->start_addr = start_addr;
 391    physmap->size = size;
 392    physmap->name = mr_name;
 393    physmap->phys_offset = phys_offset;
 394
 395    QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
 396
 397    if (runstate_check(RUN_STATE_INMIGRATE)) {
 398        /* Now when we have a physmap entry we can replace a dummy mapping with
 399         * a real one of guest foreign memory. */
 400        uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size);
 401        assert(p && p == memory_region_get_ram_ptr(mr));
 402
 403        return 0;
 404    }
 405
 406    pfn = phys_offset >> TARGET_PAGE_BITS;
 407    start_gpfn = start_addr >> TARGET_PAGE_BITS;
 408    nr_pages = size >> TARGET_PAGE_BITS;
 409    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, nr_pages, pfn,
 410                                        start_gpfn);
 411    if (rc) {
 412        int saved_errno = errno;
 413
 414        error_report("relocate_memory %lu pages from GFN %"HWADDR_PRIx
 415                     " to GFN %"HWADDR_PRIx" failed: %s",
 416                     nr_pages, pfn, start_gpfn, strerror(saved_errno));
 417        errno = saved_errno;
 418        return -1;
 419    }
 420
 421    rc = xendevicemodel_pin_memory_cacheattr(xen_dmod, xen_domid,
 422                                   start_addr >> TARGET_PAGE_BITS,
 423                                   (start_addr + size - 1) >> TARGET_PAGE_BITS,
 424                                   XEN_DOMCTL_MEM_CACHEATTR_WB);
 425    if (rc) {
 426        error_report("pin_memory_cacheattr failed: %s", strerror(errno));
 427    }
 428    return xen_save_physmap(state, physmap);
 429}
 430
 431static int xen_remove_from_physmap(XenIOState *state,
 432                                   hwaddr start_addr,
 433                                   ram_addr_t size)
 434{
 435    int rc = 0;
 436    XenPhysmap *physmap = NULL;
 437    hwaddr phys_offset = 0;
 438
 439    physmap = get_physmapping(start_addr, size);
 440    if (physmap == NULL) {
 441        return -1;
 442    }
 443
 444    phys_offset = physmap->phys_offset;
 445    size = physmap->size;
 446
 447    DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
 448            "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
 449
 450    size >>= TARGET_PAGE_BITS;
 451    start_addr >>= TARGET_PAGE_BITS;
 452    phys_offset >>= TARGET_PAGE_BITS;
 453    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, size, start_addr,
 454                                        phys_offset);
 455    if (rc) {
 456        int saved_errno = errno;
 457
 458        error_report("relocate_memory "RAM_ADDR_FMT" pages"
 459                     " from GFN %"HWADDR_PRIx
 460                     " to GFN %"HWADDR_PRIx" failed: %s",
 461                     size, start_addr, phys_offset, strerror(saved_errno));
 462        errno = saved_errno;
 463        return -1;
 464    }
 465
 466    QLIST_REMOVE(physmap, list);
 467    if (state->log_for_dirtybit == physmap) {
 468        state->log_for_dirtybit = NULL;
 469        g_free(state->dirty_bitmap);
 470        state->dirty_bitmap = NULL;
 471    }
 472    g_free(physmap);
 473
 474    return 0;
 475}
 476
 477static void xen_set_memory(struct MemoryListener *listener,
 478                           MemoryRegionSection *section,
 479                           bool add)
 480{
 481    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 482    hwaddr start_addr = section->offset_within_address_space;
 483    ram_addr_t size = int128_get64(section->size);
 484    bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA);
 485    hvmmem_type_t mem_type;
 486
 487    if (section->mr == &ram_memory) {
 488        return;
 489    } else {
 490        if (add) {
 491            xen_map_memory_section(xen_domid, state->ioservid,
 492                                   section);
 493        } else {
 494            xen_unmap_memory_section(xen_domid, state->ioservid,
 495                                     section);
 496        }
 497    }
 498
 499    if (!memory_region_is_ram(section->mr)) {
 500        return;
 501    }
 502
 503    if (log_dirty != add) {
 504        return;
 505    }
 506
 507    trace_xen_client_set_memory(start_addr, size, log_dirty);
 508
 509    start_addr &= TARGET_PAGE_MASK;
 510    size = TARGET_PAGE_ALIGN(size);
 511
 512    if (add) {
 513        if (!memory_region_is_rom(section->mr)) {
 514            xen_add_to_physmap(state, start_addr, size,
 515                               section->mr, section->offset_within_region);
 516        } else {
 517            mem_type = HVMMEM_ram_ro;
 518            if (xen_set_mem_type(xen_domid, mem_type,
 519                                 start_addr >> TARGET_PAGE_BITS,
 520                                 size >> TARGET_PAGE_BITS)) {
 521                DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n",
 522                        start_addr);
 523            }
 524        }
 525    } else {
 526        if (xen_remove_from_physmap(state, start_addr, size) < 0) {
 527            DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
 528        }
 529    }
 530}
 531
 532static void xen_region_add(MemoryListener *listener,
 533                           MemoryRegionSection *section)
 534{
 535    memory_region_ref(section->mr);
 536    xen_set_memory(listener, section, true);
 537}
 538
 539static void xen_region_del(MemoryListener *listener,
 540                           MemoryRegionSection *section)
 541{
 542    xen_set_memory(listener, section, false);
 543    memory_region_unref(section->mr);
 544}
 545
 546static void xen_io_add(MemoryListener *listener,
 547                       MemoryRegionSection *section)
 548{
 549    XenIOState *state = container_of(listener, XenIOState, io_listener);
 550    MemoryRegion *mr = section->mr;
 551
 552    if (mr->ops == &unassigned_io_ops) {
 553        return;
 554    }
 555
 556    memory_region_ref(mr);
 557
 558    xen_map_io_section(xen_domid, state->ioservid, section);
 559}
 560
 561static void xen_io_del(MemoryListener *listener,
 562                       MemoryRegionSection *section)
 563{
 564    XenIOState *state = container_of(listener, XenIOState, io_listener);
 565    MemoryRegion *mr = section->mr;
 566
 567    if (mr->ops == &unassigned_io_ops) {
 568        return;
 569    }
 570
 571    xen_unmap_io_section(xen_domid, state->ioservid, section);
 572
 573    memory_region_unref(mr);
 574}
 575
 576static void xen_device_realize(DeviceListener *listener,
 577                               DeviceState *dev)
 578{
 579    XenIOState *state = container_of(listener, XenIOState, device_listener);
 580
 581    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 582        PCIDevice *pci_dev = PCI_DEVICE(dev);
 583        XenPciDevice *xendev = g_new(XenPciDevice, 1);
 584
 585        xendev->pci_dev = pci_dev;
 586        xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
 587                                     pci_dev->devfn);
 588        QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
 589
 590        xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
 591    }
 592}
 593
 594static void xen_device_unrealize(DeviceListener *listener,
 595                                 DeviceState *dev)
 596{
 597    XenIOState *state = container_of(listener, XenIOState, device_listener);
 598
 599    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 600        PCIDevice *pci_dev = PCI_DEVICE(dev);
 601        XenPciDevice *xendev, *next;
 602
 603        xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
 604
 605        QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
 606            if (xendev->pci_dev == pci_dev) {
 607                QLIST_REMOVE(xendev, entry);
 608                g_free(xendev);
 609                break;
 610            }
 611        }
 612    }
 613}
 614
 615static void xen_sync_dirty_bitmap(XenIOState *state,
 616                                  hwaddr start_addr,
 617                                  ram_addr_t size)
 618{
 619    hwaddr npages = size >> TARGET_PAGE_BITS;
 620    const int width = sizeof(unsigned long) * 8;
 621    size_t bitmap_size = DIV_ROUND_UP(npages, width);
 622    int rc, i, j;
 623    const XenPhysmap *physmap = NULL;
 624
 625    physmap = get_physmapping(start_addr, size);
 626    if (physmap == NULL) {
 627        /* not handled */
 628        return;
 629    }
 630
 631    if (state->log_for_dirtybit == NULL) {
 632        state->log_for_dirtybit = physmap;
 633        state->dirty_bitmap = g_new(unsigned long, bitmap_size);
 634    } else if (state->log_for_dirtybit != physmap) {
 635        /* Only one range for dirty bitmap can be tracked. */
 636        return;
 637    }
 638
 639    rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
 640                              npages, state->dirty_bitmap);
 641    if (rc < 0) {
 642#ifndef ENODATA
 643#define ENODATA  ENOENT
 644#endif
 645        if (errno == ENODATA) {
 646            memory_region_set_dirty(framebuffer, 0, size);
 647            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
 648                    ", 0x" TARGET_FMT_plx "): %s\n",
 649                    start_addr, start_addr + size, strerror(errno));
 650        }
 651        return;
 652    }
 653
 654    for (i = 0; i < bitmap_size; i++) {
 655        unsigned long map = state->dirty_bitmap[i];
 656        while (map != 0) {
 657            j = ctzl(map);
 658            map &= ~(1ul << j);
 659            memory_region_set_dirty(framebuffer,
 660                                    (i * width + j) * TARGET_PAGE_SIZE,
 661                                    TARGET_PAGE_SIZE);
 662        };
 663    }
 664}
 665
 666static void xen_log_start(MemoryListener *listener,
 667                          MemoryRegionSection *section,
 668                          int old, int new)
 669{
 670    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 671
 672    if (new & ~old & (1 << DIRTY_MEMORY_VGA)) {
 673        xen_sync_dirty_bitmap(state, section->offset_within_address_space,
 674                              int128_get64(section->size));
 675    }
 676}
 677
 678static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
 679                         int old, int new)
 680{
 681    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 682
 683    if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
 684        state->log_for_dirtybit = NULL;
 685        g_free(state->dirty_bitmap);
 686        state->dirty_bitmap = NULL;
 687        /* Disable dirty bit tracking */
 688        xen_track_dirty_vram(xen_domid, 0, 0, NULL);
 689    }
 690}
 691
 692static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
 693{
 694    XenIOState *state = container_of(listener, XenIOState, memory_listener);
 695
 696    xen_sync_dirty_bitmap(state, section->offset_within_address_space,
 697                          int128_get64(section->size));
 698}
 699
 700static void xen_log_global_start(MemoryListener *listener)
 701{
 702    if (xen_enabled()) {
 703        xen_in_migration = true;
 704    }
 705}
 706
 707static void xen_log_global_stop(MemoryListener *listener)
 708{
 709    xen_in_migration = false;
 710}
 711
 712static MemoryListener xen_memory_listener = {
 713    .region_add = xen_region_add,
 714    .region_del = xen_region_del,
 715    .log_start = xen_log_start,
 716    .log_stop = xen_log_stop,
 717    .log_sync = xen_log_sync,
 718    .log_global_start = xen_log_global_start,
 719    .log_global_stop = xen_log_global_stop,
 720    .priority = 10,
 721};
 722
 723static MemoryListener xen_io_listener = {
 724    .region_add = xen_io_add,
 725    .region_del = xen_io_del,
 726    .priority = 10,
 727};
 728
 729static DeviceListener xen_device_listener = {
 730    .realize = xen_device_realize,
 731    .unrealize = xen_device_unrealize,
 732};
 733
 734/* get the ioreq packets from share mem */
 735static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
 736{
 737    ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
 738
 739    if (req->state != STATE_IOREQ_READY) {
 740        DPRINTF("I/O request not ready: "
 741                "%x, ptr: %x, port: %"PRIx64", "
 742                "data: %"PRIx64", count: %u, size: %u\n",
 743                req->state, req->data_is_ptr, req->addr,
 744                req->data, req->count, req->size);
 745        return NULL;
 746    }
 747
 748    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
 749
 750    req->state = STATE_IOREQ_INPROCESS;
 751    return req;
 752}
 753
 754/* use poll to get the port notification */
 755/* ioreq_vec--out,the */
 756/* retval--the number of ioreq packet */
 757static ioreq_t *cpu_get_ioreq(XenIOState *state)
 758{
 759    MachineState *ms = MACHINE(qdev_get_machine());
 760    unsigned int max_cpus = ms->smp.max_cpus;
 761    int i;
 762    evtchn_port_t port;
 763
 764    port = xenevtchn_pending(state->xce_handle);
 765    if (port == state->bufioreq_local_port) {
 766        timer_mod(state->buffered_io_timer,
 767                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
 768        return NULL;
 769    }
 770
 771    if (port != -1) {
 772        for (i = 0; i < max_cpus; i++) {
 773            if (state->ioreq_local_port[i] == port) {
 774                break;
 775            }
 776        }
 777
 778        if (i == max_cpus) {
 779            hw_error("Fatal error while trying to get io event!\n");
 780        }
 781
 782        /* unmask the wanted port again */
 783        xenevtchn_unmask(state->xce_handle, port);
 784
 785        /* get the io packet from shared memory */
 786        state->send_vcpu = i;
 787        return cpu_get_ioreq_from_shared_memory(state, i);
 788    }
 789
 790    /* read error or read nothing */
 791    return NULL;
 792}
 793
 794static uint32_t do_inp(uint32_t addr, unsigned long size)
 795{
 796    switch (size) {
 797        case 1:
 798            return cpu_inb(addr);
 799        case 2:
 800            return cpu_inw(addr);
 801        case 4:
 802            return cpu_inl(addr);
 803        default:
 804            hw_error("inp: bad size: %04x %lx", addr, size);
 805    }
 806}
 807
 808static void do_outp(uint32_t addr,
 809        unsigned long size, uint32_t val)
 810{
 811    switch (size) {
 812        case 1:
 813            return cpu_outb(addr, val);
 814        case 2:
 815            return cpu_outw(addr, val);
 816        case 4:
 817            return cpu_outl(addr, val);
 818        default:
 819            hw_error("outp: bad size: %04x %lx", addr, size);
 820    }
 821}
 822
 823/*
 824 * Helper functions which read/write an object from/to physical guest
 825 * memory, as part of the implementation of an ioreq.
 826 *
 827 * Equivalent to
 828 *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
 829 *                          val, req->size, 0/1)
 830 * except without the integer overflow problems.
 831 */
 832static void rw_phys_req_item(hwaddr addr,
 833                             ioreq_t *req, uint32_t i, void *val, int rw)
 834{
 835    /* Do everything unsigned so overflow just results in a truncated result
 836     * and accesses to undesired parts of guest memory, which is up
 837     * to the guest */
 838    hwaddr offset = (hwaddr)req->size * i;
 839    if (req->df) {
 840        addr -= offset;
 841    } else {
 842        addr += offset;
 843    }
 844    cpu_physical_memory_rw(addr, val, req->size, rw);
 845}
 846
 847static inline void read_phys_req_item(hwaddr addr,
 848                                      ioreq_t *req, uint32_t i, void *val)
 849{
 850    rw_phys_req_item(addr, req, i, val, 0);
 851}
 852static inline void write_phys_req_item(hwaddr addr,
 853                                       ioreq_t *req, uint32_t i, void *val)
 854{
 855    rw_phys_req_item(addr, req, i, val, 1);
 856}
 857
 858
 859static void cpu_ioreq_pio(ioreq_t *req)
 860{
 861    uint32_t i;
 862
 863    trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
 864                         req->data, req->count, req->size);
 865
 866    if (req->size > sizeof(uint32_t)) {
 867        hw_error("PIO: bad size (%u)", req->size);
 868    }
 869
 870    if (req->dir == IOREQ_READ) {
 871        if (!req->data_is_ptr) {
 872            req->data = do_inp(req->addr, req->size);
 873            trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
 874                                         req->size);
 875        } else {
 876            uint32_t tmp;
 877
 878            for (i = 0; i < req->count; i++) {
 879                tmp = do_inp(req->addr, req->size);
 880                write_phys_req_item(req->data, req, i, &tmp);
 881            }
 882        }
 883    } else if (req->dir == IOREQ_WRITE) {
 884        if (!req->data_is_ptr) {
 885            trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
 886                                          req->size);
 887            do_outp(req->addr, req->size, req->data);
 888        } else {
 889            for (i = 0; i < req->count; i++) {
 890                uint32_t tmp = 0;
 891
 892                read_phys_req_item(req->data, req, i, &tmp);
 893                do_outp(req->addr, req->size, tmp);
 894            }
 895        }
 896    }
 897}
 898
 899static void cpu_ioreq_move(ioreq_t *req)
 900{
 901    uint32_t i;
 902
 903    trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
 904                         req->data, req->count, req->size);
 905
 906    if (req->size > sizeof(req->data)) {
 907        hw_error("MMIO: bad size (%u)", req->size);
 908    }
 909
 910    if (!req->data_is_ptr) {
 911        if (req->dir == IOREQ_READ) {
 912            for (i = 0; i < req->count; i++) {
 913                read_phys_req_item(req->addr, req, i, &req->data);
 914            }
 915        } else if (req->dir == IOREQ_WRITE) {
 916            for (i = 0; i < req->count; i++) {
 917                write_phys_req_item(req->addr, req, i, &req->data);
 918            }
 919        }
 920    } else {
 921        uint64_t tmp;
 922
 923        if (req->dir == IOREQ_READ) {
 924            for (i = 0; i < req->count; i++) {
 925                read_phys_req_item(req->addr, req, i, &tmp);
 926                write_phys_req_item(req->data, req, i, &tmp);
 927            }
 928        } else if (req->dir == IOREQ_WRITE) {
 929            for (i = 0; i < req->count; i++) {
 930                read_phys_req_item(req->data, req, i, &tmp);
 931                write_phys_req_item(req->addr, req, i, &tmp);
 932            }
 933        }
 934    }
 935}
 936
 937static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
 938{
 939    uint32_t sbdf = req->addr >> 32;
 940    uint32_t reg = req->addr;
 941    XenPciDevice *xendev;
 942
 943    if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
 944        req->size != sizeof(uint32_t)) {
 945        hw_error("PCI config access: bad size (%u)", req->size);
 946    }
 947
 948    if (req->count != 1) {
 949        hw_error("PCI config access: bad count (%u)", req->count);
 950    }
 951
 952    QLIST_FOREACH(xendev, &state->dev_list, entry) {
 953        if (xendev->sbdf != sbdf) {
 954            continue;
 955        }
 956
 957        if (!req->data_is_ptr) {
 958            if (req->dir == IOREQ_READ) {
 959                req->data = pci_host_config_read_common(
 960                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 961                    req->size);
 962                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
 963                                            req->size, req->data);
 964            } else if (req->dir == IOREQ_WRITE) {
 965                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
 966                                             req->size, req->data);
 967                pci_host_config_write_common(
 968                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 969                    req->data, req->size);
 970            }
 971        } else {
 972            uint32_t tmp;
 973
 974            if (req->dir == IOREQ_READ) {
 975                tmp = pci_host_config_read_common(
 976                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 977                    req->size);
 978                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
 979                                            req->size, tmp);
 980                write_phys_req_item(req->data, req, 0, &tmp);
 981            } else if (req->dir == IOREQ_WRITE) {
 982                read_phys_req_item(req->data, req, 0, &tmp);
 983                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
 984                                             req->size, tmp);
 985                pci_host_config_write_common(
 986                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
 987                    tmp, req->size);
 988            }
 989        }
 990    }
 991}
 992
 993static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req)
 994{
 995    X86CPU *cpu;
 996    CPUX86State *env;
 997
 998    cpu = X86_CPU(current_cpu);
 999    env = &cpu->env;
1000    env->regs[R_EAX] = req->data;
1001    env->regs[R_EBX] = vmport_regs->ebx;
1002    env->regs[R_ECX] = vmport_regs->ecx;
1003    env->regs[R_EDX] = vmport_regs->edx;
1004    env->regs[R_ESI] = vmport_regs->esi;
1005    env->regs[R_EDI] = vmport_regs->edi;
1006}
1007
1008static void regs_from_cpu(vmware_regs_t *vmport_regs)
1009{
1010    X86CPU *cpu = X86_CPU(current_cpu);
1011    CPUX86State *env = &cpu->env;
1012
1013    vmport_regs->ebx = env->regs[R_EBX];
1014    vmport_regs->ecx = env->regs[R_ECX];
1015    vmport_regs->edx = env->regs[R_EDX];
1016    vmport_regs->esi = env->regs[R_ESI];
1017    vmport_regs->edi = env->regs[R_EDI];
1018}
1019
1020static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req)
1021{
1022    vmware_regs_t *vmport_regs;
1023
1024    assert(state->shared_vmport_page);
1025    vmport_regs =
1026        &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu];
1027    QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs));
1028
1029    current_cpu = state->cpu_by_vcpu_id[state->send_vcpu];
1030    regs_to_cpu(vmport_regs, req);
1031    cpu_ioreq_pio(req);
1032    regs_from_cpu(vmport_regs);
1033    current_cpu = NULL;
1034}
1035
1036static void handle_ioreq(XenIOState *state, ioreq_t *req)
1037{
1038    trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
1039                       req->addr, req->data, req->count, req->size);
1040
1041    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
1042            (req->size < sizeof (target_ulong))) {
1043        req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
1044    }
1045
1046    if (req->dir == IOREQ_WRITE)
1047        trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
1048                                 req->addr, req->data, req->count, req->size);
1049
1050    switch (req->type) {
1051        case IOREQ_TYPE_PIO:
1052            cpu_ioreq_pio(req);
1053            break;
1054        case IOREQ_TYPE_COPY:
1055            cpu_ioreq_move(req);
1056            break;
1057        case IOREQ_TYPE_VMWARE_PORT:
1058            handle_vmport_ioreq(state, req);
1059            break;
1060        case IOREQ_TYPE_TIMEOFFSET:
1061            break;
1062        case IOREQ_TYPE_INVALIDATE:
1063            xen_invalidate_map_cache();
1064            break;
1065        case IOREQ_TYPE_PCI_CONFIG:
1066            cpu_ioreq_config(state, req);
1067            break;
1068        default:
1069            hw_error("Invalid ioreq type 0x%x\n", req->type);
1070    }
1071    if (req->dir == IOREQ_READ) {
1072        trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
1073                                req->addr, req->data, req->count, req->size);
1074    }
1075}
1076
1077static int handle_buffered_iopage(XenIOState *state)
1078{
1079    buffered_iopage_t *buf_page = state->buffered_io_page;
1080    buf_ioreq_t *buf_req = NULL;
1081    ioreq_t req;
1082    int qw;
1083
1084    if (!buf_page) {
1085        return 0;
1086    }
1087
1088    memset(&req, 0x00, sizeof(req));
1089    req.state = STATE_IOREQ_READY;
1090    req.count = 1;
1091    req.dir = IOREQ_WRITE;
1092
1093    for (;;) {
1094        uint32_t rdptr = buf_page->read_pointer, wrptr;
1095
1096        xen_rmb();
1097        wrptr = buf_page->write_pointer;
1098        xen_rmb();
1099        if (rdptr != buf_page->read_pointer) {
1100            continue;
1101        }
1102        if (rdptr == wrptr) {
1103            break;
1104        }
1105        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
1106        req.size = 1U << buf_req->size;
1107        req.addr = buf_req->addr;
1108        req.data = buf_req->data;
1109        req.type = buf_req->type;
1110        xen_rmb();
1111        qw = (req.size == 8);
1112        if (qw) {
1113            if (rdptr + 1 == wrptr) {
1114                hw_error("Incomplete quad word buffered ioreq");
1115            }
1116            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
1117                                           IOREQ_BUFFER_SLOT_NUM];
1118            req.data |= ((uint64_t)buf_req->data) << 32;
1119            xen_rmb();
1120        }
1121
1122        handle_ioreq(state, &req);
1123
1124        /* Only req.data may get updated by handle_ioreq(), albeit even that
1125         * should not happen as such data would never make it to the guest (we
1126         * can only usefully see writes here after all).
1127         */
1128        assert(req.state == STATE_IOREQ_READY);
1129        assert(req.count == 1);
1130        assert(req.dir == IOREQ_WRITE);
1131        assert(!req.data_is_ptr);
1132
1133        atomic_add(&buf_page->read_pointer, qw + 1);
1134    }
1135
1136    return req.count;
1137}
1138
1139static void handle_buffered_io(void *opaque)
1140{
1141    XenIOState *state = opaque;
1142
1143    if (handle_buffered_iopage(state)) {
1144        timer_mod(state->buffered_io_timer,
1145                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
1146    } else {
1147        timer_del(state->buffered_io_timer);
1148        xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
1149    }
1150}
1151
1152static void cpu_handle_ioreq(void *opaque)
1153{
1154    XenIOState *state = opaque;
1155    ioreq_t *req = cpu_get_ioreq(state);
1156
1157    handle_buffered_iopage(state);
1158    if (req) {
1159        ioreq_t copy = *req;
1160
1161        xen_rmb();
1162        handle_ioreq(state, &copy);
1163        req->data = copy.data;
1164
1165        if (req->state != STATE_IOREQ_INPROCESS) {
1166            fprintf(stderr, "Badness in I/O request ... not in service?!: "
1167                    "%x, ptr: %x, port: %"PRIx64", "
1168                    "data: %"PRIx64", count: %u, size: %u, type: %u\n",
1169                    req->state, req->data_is_ptr, req->addr,
1170                    req->data, req->count, req->size, req->type);
1171            destroy_hvm_domain(false);
1172            return;
1173        }
1174
1175        xen_wmb(); /* Update ioreq contents /then/ update state. */
1176
1177        /*
1178         * We do this before we send the response so that the tools
1179         * have the opportunity to pick up on the reset before the
1180         * guest resumes and does a hlt with interrupts disabled which
1181         * causes Xen to powerdown the domain.
1182         */
1183        if (runstate_is_running()) {
1184            ShutdownCause request;
1185
1186            if (qemu_shutdown_requested_get()) {
1187                destroy_hvm_domain(false);
1188            }
1189            request = qemu_reset_requested_get();
1190            if (request) {
1191                qemu_system_reset(request);
1192                destroy_hvm_domain(true);
1193            }
1194        }
1195
1196        req->state = STATE_IORESP_READY;
1197        xenevtchn_notify(state->xce_handle,
1198                         state->ioreq_local_port[state->send_vcpu]);
1199    }
1200}
1201
1202static void xen_main_loop_prepare(XenIOState *state)
1203{
1204    int evtchn_fd = -1;
1205
1206    if (state->xce_handle != NULL) {
1207        evtchn_fd = xenevtchn_fd(state->xce_handle);
1208    }
1209
1210    state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
1211                                                 state);
1212
1213    if (evtchn_fd != -1) {
1214        CPUState *cpu_state;
1215
1216        DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
1217        CPU_FOREACH(cpu_state) {
1218            DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
1219                    __func__, cpu_state->cpu_index, cpu_state);
1220            state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
1221        }
1222        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
1223    }
1224}
1225
1226
1227static void xen_hvm_change_state_handler(void *opaque, int running,
1228                                         RunState rstate)
1229{
1230    XenIOState *state = opaque;
1231
1232    if (running) {
1233        xen_main_loop_prepare(state);
1234    }
1235
1236    xen_set_ioreq_server_state(xen_domid,
1237                               state->ioservid,
1238                               (rstate == RUN_STATE_RUNNING));
1239}
1240
1241static void xen_exit_notifier(Notifier *n, void *data)
1242{
1243    XenIOState *state = container_of(n, XenIOState, exit);
1244
1245    xenevtchn_close(state->xce_handle);
1246    xs_daemon_close(state->xenstore);
1247}
1248
1249#ifdef XEN_COMPAT_PHYSMAP
1250static void xen_read_physmap(XenIOState *state)
1251{
1252    XenPhysmap *physmap = NULL;
1253    unsigned int len, num, i;
1254    char path[80], *value = NULL;
1255    char **entries = NULL;
1256
1257    snprintf(path, sizeof(path),
1258            "/local/domain/0/device-model/%d/physmap", xen_domid);
1259    entries = xs_directory(state->xenstore, 0, path, &num);
1260    if (entries == NULL)
1261        return;
1262
1263    for (i = 0; i < num; i++) {
1264        physmap = g_malloc(sizeof (XenPhysmap));
1265        physmap->phys_offset = strtoull(entries[i], NULL, 16);
1266        snprintf(path, sizeof(path),
1267                "/local/domain/0/device-model/%d/physmap/%s/start_addr",
1268                xen_domid, entries[i]);
1269        value = xs_read(state->xenstore, 0, path, &len);
1270        if (value == NULL) {
1271            g_free(physmap);
1272            continue;
1273        }
1274        physmap->start_addr = strtoull(value, NULL, 16);
1275        free(value);
1276
1277        snprintf(path, sizeof(path),
1278                "/local/domain/0/device-model/%d/physmap/%s/size",
1279                xen_domid, entries[i]);
1280        value = xs_read(state->xenstore, 0, path, &len);
1281        if (value == NULL) {
1282            g_free(physmap);
1283            continue;
1284        }
1285        physmap->size = strtoull(value, NULL, 16);
1286        free(value);
1287
1288        snprintf(path, sizeof(path),
1289                "/local/domain/0/device-model/%d/physmap/%s/name",
1290                xen_domid, entries[i]);
1291        physmap->name = xs_read(state->xenstore, 0, path, &len);
1292
1293        QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
1294    }
1295    free(entries);
1296}
1297#else
1298static void xen_read_physmap(XenIOState *state)
1299{
1300}
1301#endif
1302
1303static void xen_wakeup_notifier(Notifier *notifier, void *data)
1304{
1305    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
1306}
1307
1308static int xen_map_ioreq_server(XenIOState *state)
1309{
1310    void *addr = NULL;
1311    xenforeignmemory_resource_handle *fres;
1312    xen_pfn_t ioreq_pfn;
1313    xen_pfn_t bufioreq_pfn;
1314    evtchn_port_t bufioreq_evtchn;
1315    int rc;
1316
1317    /*
1318     * Attempt to map using the resource API and fall back to normal
1319     * foreign mapping if this is not supported.
1320     */
1321    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
1322    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
1323    fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
1324                                         XENMEM_resource_ioreq_server,
1325                                         state->ioservid, 0, 2,
1326                                         &addr,
1327                                         PROT_READ | PROT_WRITE, 0);
1328    if (fres != NULL) {
1329        trace_xen_map_resource_ioreq(state->ioservid, addr);
1330        state->buffered_io_page = addr;
1331        state->shared_page = addr + TARGET_PAGE_SIZE;
1332    } else if (errno != EOPNOTSUPP) {
1333        error_report("failed to map ioreq server resources: error %d handle=%p",
1334                     errno, xen_xc);
1335        return -1;
1336    }
1337
1338    rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
1339                                   (state->shared_page == NULL) ?
1340                                   &ioreq_pfn : NULL,
1341                                   (state->buffered_io_page == NULL) ?
1342                                   &bufioreq_pfn : NULL,
1343                                   &bufioreq_evtchn);
1344    if (rc < 0) {
1345        error_report("failed to get ioreq server info: error %d handle=%p",
1346                     errno, xen_xc);
1347        return rc;
1348    }
1349
1350    if (state->shared_page == NULL) {
1351        DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
1352
1353        state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
1354                                                  PROT_READ | PROT_WRITE,
1355                                                  1, &ioreq_pfn, NULL);
1356        if (state->shared_page == NULL) {
1357            error_report("map shared IO page returned error %d handle=%p",
1358                         errno, xen_xc);
1359        }
1360    }
1361
1362    if (state->buffered_io_page == NULL) {
1363        DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
1364
1365        state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
1366                                                       PROT_READ | PROT_WRITE,
1367                                                       1, &bufioreq_pfn,
1368                                                       NULL);
1369        if (state->buffered_io_page == NULL) {
1370            error_report("map buffered IO page returned error %d", errno);
1371            return -1;
1372        }
1373    }
1374
1375    if (state->shared_page == NULL || state->buffered_io_page == NULL) {
1376        return -1;
1377    }
1378
1379    DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
1380
1381    state->bufioreq_remote_port = bufioreq_evtchn;
1382
1383    return 0;
1384}
1385
1386void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
1387{
1388    MachineState *ms = MACHINE(pcms);
1389    unsigned int max_cpus = ms->smp.max_cpus;
1390    int i, rc;
1391    xen_pfn_t ioreq_pfn;
1392    XenIOState *state;
1393
1394    state = g_malloc0(sizeof (XenIOState));
1395
1396    state->xce_handle = xenevtchn_open(NULL, 0);
1397    if (state->xce_handle == NULL) {
1398        perror("xen: event channel open");
1399        goto err;
1400    }
1401
1402    state->xenstore = xs_daemon_open();
1403    if (state->xenstore == NULL) {
1404        perror("xen: xenstore open");
1405        goto err;
1406    }
1407
1408    xen_create_ioreq_server(xen_domid, &state->ioservid);
1409
1410    state->exit.notify = xen_exit_notifier;
1411    qemu_add_exit_notifier(&state->exit);
1412
1413    state->suspend.notify = xen_suspend_notifier;
1414    qemu_register_suspend_notifier(&state->suspend);
1415
1416    state->wakeup.notify = xen_wakeup_notifier;
1417    qemu_register_wakeup_notifier(&state->wakeup);
1418
1419    /*
1420     * Register wake-up support in QMP query-current-machine API
1421     */
1422    qemu_register_wakeup_support();
1423
1424    rc = xen_map_ioreq_server(state);
1425    if (rc < 0) {
1426        goto err;
1427    }
1428
1429    rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn);
1430    if (!rc) {
1431        DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn);
1432        state->shared_vmport_page =
1433            xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE,
1434                                 1, &ioreq_pfn, NULL);
1435        if (state->shared_vmport_page == NULL) {
1436            error_report("map shared vmport IO page returned error %d handle=%p",
1437                         errno, xen_xc);
1438            goto err;
1439        }
1440    } else if (rc != -ENOSYS) {
1441        error_report("get vmport regs pfn returned error %d, rc=%d",
1442                     errno, rc);
1443        goto err;
1444    }
1445
1446    /* Note: cpus is empty at this point in init */
1447    state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));
1448
1449    rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
1450    if (rc < 0) {
1451        error_report("failed to enable ioreq server info: error %d handle=%p",
1452                     errno, xen_xc);
1453        goto err;
1454    }
1455
1456    state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
1457
1458    /* FIXME: how about if we overflow the page here? */
1459    for (i = 0; i < max_cpus; i++) {
1460        rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1461                                        xen_vcpu_eport(state->shared_page, i));
1462        if (rc == -1) {
1463            error_report("shared evtchn %d bind error %d", i, errno);
1464            goto err;
1465        }
1466        state->ioreq_local_port[i] = rc;
1467    }
1468
1469    rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
1470                                    state->bufioreq_remote_port);
1471    if (rc == -1) {
1472        error_report("buffered evtchn bind error %d", errno);
1473        goto err;
1474    }
1475    state->bufioreq_local_port = rc;
1476
1477    /* Init RAM management */
1478#ifdef XEN_COMPAT_PHYSMAP
1479    xen_map_cache_init(xen_phys_offset_to_gaddr, state);
1480#else
1481    xen_map_cache_init(NULL, state);
1482#endif
1483    xen_ram_init(pcms, ram_size, ram_memory);
1484
1485    qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
1486
1487    state->memory_listener = xen_memory_listener;
1488    memory_listener_register(&state->memory_listener, &address_space_memory);
1489    state->log_for_dirtybit = NULL;
1490
1491    state->io_listener = xen_io_listener;
1492    memory_listener_register(&state->io_listener, &address_space_io);
1493
1494    state->device_listener = xen_device_listener;
1495    QLIST_INIT(&state->dev_list);
1496    device_listener_register(&state->device_listener);
1497
1498    xen_bus_init();
1499
1500    /* Initialize backend core & drivers */
1501    if (xen_be_init() != 0) {
1502        error_report("xen backend core setup failed");
1503        goto err;
1504    }
1505    xen_be_register_common();
1506
1507    QLIST_INIT(&xen_physmap);
1508    xen_read_physmap(state);
1509
1510    /* Disable ACPI build because Xen handles it */
1511    pcms->acpi_build_enabled = false;
1512
1513    return;
1514
1515err:
1516    error_report("xen hardware virtual machine initialisation failed");
1517    exit(1);
1518}
1519
1520void destroy_hvm_domain(bool reboot)
1521{
1522    xc_interface *xc_handle;
1523    int sts;
1524    int rc;
1525
1526    unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
1527
1528    if (xen_dmod) {
1529        rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
1530        if (!rc) {
1531            return;
1532        }
1533        if (errno != ENOTTY /* old Xen */) {
1534            perror("xendevicemodel_shutdown failed");
1535        }
1536        /* well, try the old thing then */
1537    }
1538
1539    xc_handle = xc_interface_open(0, 0, 0);
1540    if (xc_handle == NULL) {
1541        fprintf(stderr, "Cannot acquire xenctrl handle\n");
1542    } else {
1543        sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
1544        if (sts != 0) {
1545            fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
1546                    "sts %d, %s\n", reboot ? "reboot" : "poweroff",
1547                    sts, strerror(errno));
1548        } else {
1549            fprintf(stderr, "Issued domain %d %s\n", xen_domid,
1550                    reboot ? "reboot" : "poweroff");
1551        }
1552        xc_interface_close(xc_handle);
1553    }
1554}
1555
1556void xen_register_framebuffer(MemoryRegion *mr)
1557{
1558    framebuffer = mr;
1559}
1560
1561void xen_shutdown_fatal_error(const char *fmt, ...)
1562{
1563    va_list ap;
1564
1565    va_start(ap, fmt);
1566    vfprintf(stderr, fmt, ap);
1567    va_end(ap);
1568    fprintf(stderr, "Will destroy the domain.\n");
1569    /* destroy the domain */
1570    qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
1571}
1572
1573void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
1574{
1575    if (unlikely(xen_in_migration)) {
1576        int rc;
1577        ram_addr_t start_pfn, nb_pages;
1578
1579        start = xen_phys_offset_to_gaddr(start, length);
1580
1581        if (length == 0) {
1582            length = TARGET_PAGE_SIZE;
1583        }
1584        start_pfn = start >> TARGET_PAGE_BITS;
1585        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
1586            - start_pfn;
1587        rc = xen_modified_memory(xen_domid, start_pfn, nb_pages);
1588        if (rc) {
1589            fprintf(stderr,
1590                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
1591                    __func__, start, nb_pages, errno, strerror(errno));
1592        }
1593    }
1594}
1595
1596void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
1597{
1598    if (enable) {
1599        memory_global_dirty_log_start();
1600    } else {
1601        memory_global_dirty_log_stop();
1602    }
1603}
1604