qemu/hw/remote/vfio-user-obj.c
<<
>>
Prefs
   1/**
   2 * QEMU vfio-user-server server object
   3 *
   4 * Copyright © 2022 Oracle and/or its affiliates.
   5 *
   6 * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
   7 *
   8 * See the COPYING file in the top-level directory.
   9 *
  10 */
  11
  12/**
  13 * Usage: add options:
  14 *     -machine x-remote,vfio-user=on,auto-shutdown=on
  15 *     -device <PCI-device>,id=<pci-dev-id>
  16 *     -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
  17 *             device=<pci-dev-id>
  18 *
  19 * Note that x-vfio-user-server object must be used with x-remote machine only.
  20 * This server could only support PCI devices for now.
  21 *
  22 * type - SocketAddress type - presently "unix" alone is supported. Required
  23 *        option
  24 *
  25 * path - named unix socket, it will be created by the server. It is
  26 *        a required option
  27 *
  28 * device - id of a device on the server, a required option. PCI devices
  29 *          alone are supported presently.
  30 *
  31 * notes - x-vfio-user-server could block IO and monitor during the
  32 *         initialization phase.
  33 */
  34
  35#include "qemu/osdep.h"
  36
  37#include "qom/object.h"
  38#include "qom/object_interfaces.h"
  39#include "qemu/error-report.h"
  40#include "trace.h"
  41#include "sysemu/runstate.h"
  42#include "hw/boards.h"
  43#include "hw/remote/machine.h"
  44#include "qapi/error.h"
  45#include "qapi/qapi-visit-sockets.h"
  46#include "qapi/qapi-events-misc.h"
  47#include "qemu/notify.h"
  48#include "qemu/thread.h"
  49#include "qemu/main-loop.h"
  50#include "sysemu/sysemu.h"
  51#include "libvfio-user.h"
  52#include "hw/qdev-core.h"
  53#include "hw/pci/pci.h"
  54#include "qemu/timer.h"
  55#include "exec/memory.h"
  56#include "hw/pci/msi.h"
  57#include "hw/pci/msix.h"
  58#include "hw/remote/vfio-user-obj.h"
  59
  60#define TYPE_VFU_OBJECT "x-vfio-user-server"
  61OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
  62
  63/**
  64 * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown
  65 * is set, it aborts the machine on error. Otherwise, it logs an
  66 * error message without aborting.
  67 */
  68#define VFU_OBJECT_ERROR(o, fmt, ...)                                     \
  69    {                                                                     \
  70        if (vfu_object_auto_shutdown()) {                                 \
  71            error_setg(&error_abort, (fmt), ## __VA_ARGS__);              \
  72        } else {                                                          \
  73            error_report((fmt), ## __VA_ARGS__);                          \
  74        }                                                                 \
  75    }                                                                     \
  76
  77struct VfuObjectClass {
  78    ObjectClass parent_class;
  79
  80    unsigned int nr_devs;
  81};
  82
  83struct VfuObject {
  84    /* private */
  85    Object parent;
  86
  87    SocketAddress *socket;
  88
  89    char *device;
  90
  91    Error *err;
  92
  93    Notifier machine_done;
  94
  95    vfu_ctx_t *vfu_ctx;
  96
  97    PCIDevice *pci_dev;
  98
  99    Error *unplug_blocker;
 100
 101    int vfu_poll_fd;
 102
 103    MSITriggerFunc *default_msi_trigger;
 104    MSIPrepareMessageFunc *default_msi_prepare_message;
 105    MSIxPrepareMessageFunc *default_msix_prepare_message;
 106};
 107
 108static void vfu_object_init_ctx(VfuObject *o, Error **errp);
 109
 110static bool vfu_object_auto_shutdown(void)
 111{
 112    bool auto_shutdown = true;
 113    Error *local_err = NULL;
 114
 115    if (!current_machine) {
 116        return auto_shutdown;
 117    }
 118
 119    auto_shutdown = object_property_get_bool(OBJECT(current_machine),
 120                                             "auto-shutdown",
 121                                             &local_err);
 122
 123    /*
 124     * local_err would be set if no such property exists - safe to ignore.
 125     * Unlikely scenario as auto-shutdown is always defined for
 126     * TYPE_REMOTE_MACHINE, and  TYPE_VFU_OBJECT only works with
 127     * TYPE_REMOTE_MACHINE
 128     */
 129    if (local_err) {
 130        auto_shutdown = true;
 131        error_free(local_err);
 132    }
 133
 134    return auto_shutdown;
 135}
 136
 137static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
 138                                  void *opaque, Error **errp)
 139{
 140    VfuObject *o = VFU_OBJECT(obj);
 141
 142    if (o->vfu_ctx) {
 143        error_setg(errp, "vfu: Unable to set socket property - server busy");
 144        return;
 145    }
 146
 147    qapi_free_SocketAddress(o->socket);
 148
 149    o->socket = NULL;
 150
 151    visit_type_SocketAddress(v, name, &o->socket, errp);
 152
 153    if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
 154        error_setg(errp, "vfu: Unsupported socket type - %s",
 155                   SocketAddressType_str(o->socket->type));
 156        qapi_free_SocketAddress(o->socket);
 157        o->socket = NULL;
 158        return;
 159    }
 160
 161    trace_vfu_prop("socket", o->socket->u.q_unix.path);
 162
 163    vfu_object_init_ctx(o, errp);
 164}
 165
 166static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
 167{
 168    VfuObject *o = VFU_OBJECT(obj);
 169
 170    if (o->vfu_ctx) {
 171        error_setg(errp, "vfu: Unable to set device property - server busy");
 172        return;
 173    }
 174
 175    g_free(o->device);
 176
 177    o->device = g_strdup(str);
 178
 179    trace_vfu_prop("device", str);
 180
 181    vfu_object_init_ctx(o, errp);
 182}
 183
 184static void vfu_object_ctx_run(void *opaque)
 185{
 186    VfuObject *o = opaque;
 187    const char *vfu_id;
 188    char *vfu_path, *pci_dev_path;
 189    int ret = -1;
 190
 191    while (ret != 0) {
 192        ret = vfu_run_ctx(o->vfu_ctx);
 193        if (ret < 0) {
 194            if (errno == EINTR) {
 195                continue;
 196            } else if (errno == ENOTCONN) {
 197                vfu_id = object_get_canonical_path_component(OBJECT(o));
 198                vfu_path = object_get_canonical_path(OBJECT(o));
 199                g_assert(o->pci_dev);
 200                pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
 201                 /* o->device is a required property and is non-NULL here */
 202                g_assert(o->device);
 203                qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
 204                                                  o->device, pci_dev_path);
 205                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
 206                o->vfu_poll_fd = -1;
 207                object_unparent(OBJECT(o));
 208                g_free(vfu_path);
 209                g_free(pci_dev_path);
 210                break;
 211            } else {
 212                VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
 213                                 o->device, strerror(errno));
 214                break;
 215            }
 216        }
 217    }
 218}
 219
 220static void vfu_object_attach_ctx(void *opaque)
 221{
 222    VfuObject *o = opaque;
 223    GPollFD pfds[1];
 224    int ret;
 225
 226    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
 227
 228    pfds[0].fd = o->vfu_poll_fd;
 229    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
 230
 231retry_attach:
 232    ret = vfu_attach_ctx(o->vfu_ctx);
 233    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
 234        /**
 235         * vfu_object_attach_ctx can block QEMU's main loop
 236         * during attach - the monitor and other IO
 237         * could be unresponsive during this time.
 238         */
 239        (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
 240        goto retry_attach;
 241    } else if (ret < 0) {
 242        VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
 243                         o->device, strerror(errno));
 244        return;
 245    }
 246
 247    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
 248    if (o->vfu_poll_fd < 0) {
 249        VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
 250        return;
 251    }
 252
 253    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
 254}
 255
 256static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
 257                                     size_t count, loff_t offset,
 258                                     const bool is_write)
 259{
 260    VfuObject *o = vfu_get_private(vfu_ctx);
 261    uint32_t pci_access_width = sizeof(uint32_t);
 262    size_t bytes = count;
 263    uint32_t val = 0;
 264    char *ptr = buf;
 265    int len;
 266
 267    /*
 268     * Writes to the BAR registers would trigger an update to the
 269     * global Memory and IO AddressSpaces. But the remote device
 270     * never uses the global AddressSpaces, therefore overlapping
 271     * memory regions are not a problem
 272     */
 273    while (bytes > 0) {
 274        len = (bytes > pci_access_width) ? pci_access_width : bytes;
 275        if (is_write) {
 276            memcpy(&val, ptr, len);
 277            pci_host_config_write_common(o->pci_dev, offset,
 278                                         pci_config_size(o->pci_dev),
 279                                         val, len);
 280            trace_vfu_cfg_write(offset, val);
 281        } else {
 282            val = pci_host_config_read_common(o->pci_dev, offset,
 283                                              pci_config_size(o->pci_dev), len);
 284            memcpy(ptr, &val, len);
 285            trace_vfu_cfg_read(offset, val);
 286        }
 287        offset += len;
 288        ptr += len;
 289        bytes -= len;
 290    }
 291
 292    return count;
 293}
 294
 295static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
 296{
 297    VfuObject *o = vfu_get_private(vfu_ctx);
 298    AddressSpace *dma_as = NULL;
 299    MemoryRegion *subregion = NULL;
 300    g_autofree char *name = NULL;
 301    struct iovec *iov = &info->iova;
 302
 303    if (!info->vaddr) {
 304        return;
 305    }
 306
 307    name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
 308                           (uint64_t)info->vaddr);
 309
 310    subregion = g_new0(MemoryRegion, 1);
 311
 312    memory_region_init_ram_ptr(subregion, NULL, name,
 313                               iov->iov_len, info->vaddr);
 314
 315    dma_as = pci_device_iommu_address_space(o->pci_dev);
 316
 317    memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
 318
 319    trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
 320}
 321
 322static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
 323{
 324    VfuObject *o = vfu_get_private(vfu_ctx);
 325    AddressSpace *dma_as = NULL;
 326    MemoryRegion *mr = NULL;
 327    ram_addr_t offset;
 328
 329    mr = memory_region_from_host(info->vaddr, &offset);
 330    if (!mr) {
 331        return;
 332    }
 333
 334    dma_as = pci_device_iommu_address_space(o->pci_dev);
 335
 336    memory_region_del_subregion(dma_as->root, mr);
 337
 338    object_unparent((OBJECT(mr)));
 339
 340    trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
 341}
 342
 343static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
 344                            hwaddr size, const bool is_write)
 345{
 346    uint8_t *ptr = buf;
 347    bool release_lock = false;
 348    uint8_t *ram_ptr = NULL;
 349    MemTxResult result;
 350    int access_size;
 351    uint64_t val;
 352
 353    if (memory_access_is_direct(mr, is_write)) {
 354        /**
 355         * Some devices expose a PCI expansion ROM, which could be buffer
 356         * based as compared to other regions which are primarily based on
 357         * MemoryRegionOps. memory_region_find() would already check
 358         * for buffer overflow, we don't need to repeat it here.
 359         */
 360        ram_ptr = memory_region_get_ram_ptr(mr);
 361
 362        if (is_write) {
 363            memcpy((ram_ptr + offset), buf, size);
 364        } else {
 365            memcpy(buf, (ram_ptr + offset), size);
 366        }
 367
 368        return 0;
 369    }
 370
 371    while (size) {
 372        /**
 373         * The read/write logic used below is similar to the ones in
 374         * flatview_read/write_continue()
 375         */
 376        release_lock = prepare_mmio_access(mr);
 377
 378        access_size = memory_access_size(mr, size, offset);
 379
 380        if (is_write) {
 381            val = ldn_he_p(ptr, access_size);
 382
 383            result = memory_region_dispatch_write(mr, offset, val,
 384                                                  size_memop(access_size),
 385                                                  MEMTXATTRS_UNSPECIFIED);
 386        } else {
 387            result = memory_region_dispatch_read(mr, offset, &val,
 388                                                 size_memop(access_size),
 389                                                 MEMTXATTRS_UNSPECIFIED);
 390
 391            stn_he_p(ptr, access_size, val);
 392        }
 393
 394        if (release_lock) {
 395            qemu_mutex_unlock_iothread();
 396            release_lock = false;
 397        }
 398
 399        if (result != MEMTX_OK) {
 400            return -1;
 401        }
 402
 403        size -= access_size;
 404        ptr += access_size;
 405        offset += access_size;
 406    }
 407
 408    return 0;
 409}
 410
 411static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
 412                                hwaddr bar_offset, char * const buf,
 413                                hwaddr len, const bool is_write)
 414{
 415    MemoryRegionSection section = { 0 };
 416    uint8_t *ptr = (uint8_t *)buf;
 417    MemoryRegion *section_mr = NULL;
 418    uint64_t section_size;
 419    hwaddr section_offset;
 420    hwaddr size = 0;
 421
 422    while (len) {
 423        section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
 424                                     bar_offset, len);
 425
 426        if (!section.mr) {
 427            warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
 428            return size;
 429        }
 430
 431        section_mr = section.mr;
 432        section_offset = section.offset_within_region;
 433        section_size = int128_get64(section.size);
 434
 435        if (is_write && section_mr->readonly) {
 436            warn_report("vfu: attempting to write to readonly region in "
 437                        "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
 438                        pci_bar, bar_offset,
 439                        (bar_offset + section_size));
 440            memory_region_unref(section_mr);
 441            return size;
 442        }
 443
 444        if (vfu_object_mr_rw(section_mr, ptr, section_offset,
 445                             section_size, is_write)) {
 446            warn_report("vfu: failed to %s "
 447                        "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
 448                        is_write ? "write to" : "read from", bar_offset,
 449                        (bar_offset + section_size), pci_bar);
 450            memory_region_unref(section_mr);
 451            return size;
 452        }
 453
 454        size += section_size;
 455        bar_offset += section_size;
 456        ptr += section_size;
 457        len -= section_size;
 458
 459        memory_region_unref(section_mr);
 460    }
 461
 462    return size;
 463}
 464
 465/**
 466 * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
 467 *
 468 * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
 469 * define vfu_object_bar2_handler
 470 */
 471#define VFU_OBJECT_BAR_HANDLER(BAR_NO)                                         \
 472    static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx,        \
 473                                        char * const buf, size_t count,        \
 474                                        loff_t offset, const bool is_write)    \
 475    {                                                                          \
 476        VfuObject *o = vfu_get_private(vfu_ctx);                               \
 477        PCIDevice *pci_dev = o->pci_dev;                                       \
 478                                                                               \
 479        return vfu_object_bar_rw(pci_dev, BAR_NO, offset,                      \
 480                                 buf, count, is_write);                        \
 481    }                                                                          \
 482
 483VFU_OBJECT_BAR_HANDLER(0)
 484VFU_OBJECT_BAR_HANDLER(1)
 485VFU_OBJECT_BAR_HANDLER(2)
 486VFU_OBJECT_BAR_HANDLER(3)
 487VFU_OBJECT_BAR_HANDLER(4)
 488VFU_OBJECT_BAR_HANDLER(5)
 489VFU_OBJECT_BAR_HANDLER(6)
 490
 491static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
 492    &vfu_object_bar0_handler,
 493    &vfu_object_bar1_handler,
 494    &vfu_object_bar2_handler,
 495    &vfu_object_bar3_handler,
 496    &vfu_object_bar4_handler,
 497    &vfu_object_bar5_handler,
 498    &vfu_object_bar6_handler,
 499};
 500
 501/**
 502 * vfu_object_register_bars - Identify active BAR regions of pdev and setup
 503 *                            callbacks to handle read/write accesses
 504 */
 505static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
 506{
 507    int flags = VFU_REGION_FLAG_RW;
 508    int i;
 509
 510    for (i = 0; i < PCI_NUM_REGIONS; i++) {
 511        if (!pdev->io_regions[i].size) {
 512            continue;
 513        }
 514
 515        if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
 516            pdev->io_regions[i].memory->readonly) {
 517            flags &= ~VFU_REGION_FLAG_WRITE;
 518        }
 519
 520        vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
 521                         (size_t)pdev->io_regions[i].size,
 522                         vfu_object_bar_handlers[i],
 523                         flags, NULL, 0, -1, 0);
 524
 525        trace_vfu_bar_register(i, pdev->io_regions[i].addr,
 526                               pdev->io_regions[i].size);
 527    }
 528}
 529
 530static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
 531{
 532    int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
 533                                pci_dev->devfn);
 534
 535    return pci_bdf;
 536}
 537
 538static void vfu_object_set_irq(void *opaque, int pirq, int level)
 539{
 540    PCIBus *pci_bus = opaque;
 541    PCIDevice *pci_dev = NULL;
 542    vfu_ctx_t *vfu_ctx = NULL;
 543    int pci_bus_num, devfn;
 544
 545    if (level) {
 546        pci_bus_num = PCI_BUS_NUM(pirq);
 547        devfn = PCI_BDF_TO_DEVFN(pirq);
 548
 549        /*
 550         * pci_find_device() performs at O(1) if the device is attached
 551         * to the root PCI bus. Whereas, if the device is attached to a
 552         * secondary PCI bus (such as when a root port is involved),
 553         * finding the parent PCI bus could take O(n)
 554         */
 555        pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
 556
 557        vfu_ctx = pci_dev->irq_opaque;
 558
 559        g_assert(vfu_ctx);
 560
 561        vfu_irq_trigger(vfu_ctx, 0);
 562    }
 563}
 564
 565static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
 566                                             unsigned int vector)
 567{
 568    MSIMessage msg;
 569
 570    msg.address = 0;
 571    msg.data = vector;
 572
 573    return msg;
 574}
 575
 576static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
 577{
 578    vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
 579
 580    vfu_irq_trigger(vfu_ctx, msg.data);
 581}
 582
 583static void vfu_object_setup_msi_cbs(VfuObject *o)
 584{
 585    o->default_msi_trigger = o->pci_dev->msi_trigger;
 586    o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
 587    o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
 588
 589    o->pci_dev->msi_trigger = vfu_object_msi_trigger;
 590    o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
 591    o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
 592}
 593
 594static void vfu_object_restore_msi_cbs(VfuObject *o)
 595{
 596    o->pci_dev->msi_trigger = o->default_msi_trigger;
 597    o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
 598    o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
 599}
 600
 601static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
 602                               uint32_t count, bool mask)
 603{
 604    VfuObject *o = vfu_get_private(vfu_ctx);
 605    uint32_t vector;
 606
 607    for (vector = start; vector < count; vector++) {
 608        msix_set_mask(o->pci_dev, vector, mask);
 609    }
 610}
 611
 612static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
 613                              uint32_t count, bool mask)
 614{
 615    VfuObject *o = vfu_get_private(vfu_ctx);
 616    Error *err = NULL;
 617    uint32_t vector;
 618
 619    for (vector = start; vector < count; vector++) {
 620        msi_set_mask(o->pci_dev, vector, mask, &err);
 621        if (err) {
 622            VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
 623                             error_get_pretty(err));
 624            error_free(err);
 625            err = NULL;
 626        }
 627    }
 628}
 629
 630static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
 631{
 632    vfu_ctx_t *vfu_ctx = o->vfu_ctx;
 633    int ret;
 634
 635    ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
 636    if (ret < 0) {
 637        return ret;
 638    }
 639
 640    if (msix_nr_vectors_allocated(pci_dev)) {
 641        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
 642                                       msix_nr_vectors_allocated(pci_dev));
 643        vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
 644                                     &vfu_msix_irq_state);
 645    } else if (msi_nr_vectors_allocated(pci_dev)) {
 646        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
 647                                       msi_nr_vectors_allocated(pci_dev));
 648        vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
 649                                     &vfu_msi_irq_state);
 650    }
 651
 652    if (ret < 0) {
 653        return ret;
 654    }
 655
 656    vfu_object_setup_msi_cbs(o);
 657
 658    pci_dev->irq_opaque = vfu_ctx;
 659
 660    return 0;
 661}
 662
 663void vfu_object_set_bus_irq(PCIBus *pci_bus)
 664{
 665    int bus_num = pci_bus_num(pci_bus);
 666    int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
 667
 668    pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
 669                 max_bdf);
 670}
 671
 672static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
 673{
 674    VfuObject *o = vfu_get_private(vfu_ctx);
 675
 676    /* vfu_object_ctx_run() handles lost connection */
 677    if (type == VFU_RESET_LOST_CONN) {
 678        return 0;
 679    }
 680
 681    qdev_reset_all(DEVICE(o->pci_dev));
 682
 683    return 0;
 684}
 685
 686/*
 687 * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
 688 * properties. It also depends on devices instantiated in QEMU. These
 689 * dependencies are not available during the instance_init phase of this
 690 * object's life-cycle. As such, the server is initialized after the
 691 * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
 692 * when the machine is setup, and the dependencies are available.
 693 */
 694static void vfu_object_machine_done(Notifier *notifier, void *data)
 695{
 696    VfuObject *o = container_of(notifier, VfuObject, machine_done);
 697    Error *err = NULL;
 698
 699    vfu_object_init_ctx(o, &err);
 700
 701    if (err) {
 702        error_propagate(&error_abort, err);
 703    }
 704}
 705
 706/**
 707 * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
 708 *     an unplug blocker for the associated PCI device. Setup a FD handler
 709 *     to process incoming messages in the context's socket.
 710 *
 711 *     The socket and device properties are mandatory, and this function
 712 *     will not create the context without them - the setters for these
 713 *     properties should call this function when the property is set. The
 714 *     machine should also be ready when this function is invoked - it is
 715 *     because QEMU objects are initialized before devices, and the
 716 *     associated PCI device wouldn't be available at the object
 717 *     initialization time. Until these conditions are satisfied, this
 718 *     function would return early without performing any task.
 719 */
 720static void vfu_object_init_ctx(VfuObject *o, Error **errp)
 721{
 722    ERRP_GUARD();
 723    DeviceState *dev = NULL;
 724    vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
 725    int ret;
 726
 727    if (o->vfu_ctx || !o->socket || !o->device ||
 728            !phase_check(PHASE_MACHINE_READY)) {
 729        return;
 730    }
 731
 732    if (o->err) {
 733        error_propagate(errp, o->err);
 734        o->err = NULL;
 735        return;
 736    }
 737
 738    o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
 739                                LIBVFIO_USER_FLAG_ATTACH_NB,
 740                                o, VFU_DEV_TYPE_PCI);
 741    if (o->vfu_ctx == NULL) {
 742        error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
 743        return;
 744    }
 745
 746    dev = qdev_find_recursive(sysbus_get_default(), o->device);
 747    if (dev == NULL) {
 748        error_setg(errp, "vfu: Device %s not found", o->device);
 749        goto fail;
 750    }
 751
 752    if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 753        error_setg(errp, "vfu: %s not a PCI device", o->device);
 754        goto fail;
 755    }
 756
 757    o->pci_dev = PCI_DEVICE(dev);
 758
 759    object_ref(OBJECT(o->pci_dev));
 760
 761    if (pci_is_express(o->pci_dev)) {
 762        pci_type = VFU_PCI_TYPE_EXPRESS;
 763    }
 764
 765    ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
 766    if (ret < 0) {
 767        error_setg(errp,
 768                   "vfu: Failed to attach PCI device %s to context - %s",
 769                   o->device, strerror(errno));
 770        goto fail;
 771    }
 772
 773    error_setg(&o->unplug_blocker,
 774               "vfu: %s for %s must be deleted before unplugging",
 775               TYPE_VFU_OBJECT, o->device);
 776    qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
 777
 778    ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
 779                           pci_config_size(o->pci_dev), &vfu_object_cfg_access,
 780                           VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
 781                           NULL, 0, -1, 0);
 782    if (ret < 0) {
 783        error_setg(errp,
 784                   "vfu: Failed to setup config space handlers for %s- %s",
 785                   o->device, strerror(errno));
 786        goto fail;
 787    }
 788
 789    ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
 790    if (ret < 0) {
 791        error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
 792                   o->device);
 793        goto fail;
 794    }
 795
 796    vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
 797
 798    ret = vfu_object_setup_irqs(o, o->pci_dev);
 799    if (ret < 0) {
 800        error_setg(errp, "vfu: Failed to setup interrupts for %s",
 801                   o->device);
 802        goto fail;
 803    }
 804
 805    ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
 806    if (ret < 0) {
 807        error_setg(errp, "vfu: Failed to setup reset callback");
 808        goto fail;
 809    }
 810
 811    ret = vfu_realize_ctx(o->vfu_ctx);
 812    if (ret < 0) {
 813        error_setg(errp, "vfu: Failed to realize device %s- %s",
 814                   o->device, strerror(errno));
 815        goto fail;
 816    }
 817
 818    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
 819    if (o->vfu_poll_fd < 0) {
 820        error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
 821        goto fail;
 822    }
 823
 824    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
 825
 826    return;
 827
 828fail:
 829    vfu_destroy_ctx(o->vfu_ctx);
 830    if (o->unplug_blocker && o->pci_dev) {
 831        qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
 832        error_free(o->unplug_blocker);
 833        o->unplug_blocker = NULL;
 834    }
 835    if (o->pci_dev) {
 836        vfu_object_restore_msi_cbs(o);
 837        o->pci_dev->irq_opaque = NULL;
 838        object_unref(OBJECT(o->pci_dev));
 839        o->pci_dev = NULL;
 840    }
 841    o->vfu_ctx = NULL;
 842}
 843
 844static void vfu_object_init(Object *obj)
 845{
 846    VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
 847    VfuObject *o = VFU_OBJECT(obj);
 848
 849    k->nr_devs++;
 850
 851    if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
 852        error_setg(&o->err, "vfu: %s only compatible with %s machine",
 853                   TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
 854        return;
 855    }
 856
 857    if (!phase_check(PHASE_MACHINE_READY)) {
 858        o->machine_done.notify = vfu_object_machine_done;
 859        qemu_add_machine_init_done_notifier(&o->machine_done);
 860    }
 861
 862    o->vfu_poll_fd = -1;
 863}
 864
 865static void vfu_object_finalize(Object *obj)
 866{
 867    VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
 868    VfuObject *o = VFU_OBJECT(obj);
 869
 870    k->nr_devs--;
 871
 872    qapi_free_SocketAddress(o->socket);
 873
 874    o->socket = NULL;
 875
 876    if (o->vfu_poll_fd != -1) {
 877        qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
 878        o->vfu_poll_fd = -1;
 879    }
 880
 881    if (o->vfu_ctx) {
 882        vfu_destroy_ctx(o->vfu_ctx);
 883        o->vfu_ctx = NULL;
 884    }
 885
 886    g_free(o->device);
 887
 888    o->device = NULL;
 889
 890    if (o->unplug_blocker && o->pci_dev) {
 891        qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
 892        error_free(o->unplug_blocker);
 893        o->unplug_blocker = NULL;
 894    }
 895
 896    if (o->pci_dev) {
 897        vfu_object_restore_msi_cbs(o);
 898        o->pci_dev->irq_opaque = NULL;
 899        object_unref(OBJECT(o->pci_dev));
 900        o->pci_dev = NULL;
 901    }
 902
 903    if (!k->nr_devs && vfu_object_auto_shutdown()) {
 904        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 905    }
 906
 907    if (o->machine_done.notify) {
 908        qemu_remove_machine_init_done_notifier(&o->machine_done);
 909        o->machine_done.notify = NULL;
 910    }
 911}
 912
 913static void vfu_object_class_init(ObjectClass *klass, void *data)
 914{
 915    VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
 916
 917    k->nr_devs = 0;
 918
 919    object_class_property_add(klass, "socket", "SocketAddress", NULL,
 920                              vfu_object_set_socket, NULL, NULL);
 921    object_class_property_set_description(klass, "socket",
 922                                          "SocketAddress "
 923                                          "(ex: type=unix,path=/tmp/sock). "
 924                                          "Only UNIX is presently supported");
 925    object_class_property_add_str(klass, "device", NULL,
 926                                  vfu_object_set_device);
 927    object_class_property_set_description(klass, "device",
 928                                          "device ID - only PCI devices "
 929                                          "are presently supported");
 930}
 931
 932static const TypeInfo vfu_object_info = {
 933    .name = TYPE_VFU_OBJECT,
 934    .parent = TYPE_OBJECT,
 935    .instance_size = sizeof(VfuObject),
 936    .instance_init = vfu_object_init,
 937    .instance_finalize = vfu_object_finalize,
 938    .class_size = sizeof(VfuObjectClass),
 939    .class_init = vfu_object_class_init,
 940    .interfaces = (InterfaceInfo[]) {
 941        { TYPE_USER_CREATABLE },
 942        { }
 943    }
 944};
 945
 946static void vfu_register_types(void)
 947{
 948    type_register_static(&vfu_object_info);
 949}
 950
 951type_init(vfu_register_types);
 952