qemu/hw/misc/ivshmem.c
<<
>>
Prefs
   1/*
   2 * Inter-VM Shared Memory PCI device.
   3 *
   4 * Author:
   5 *      Cam Macdonell <cam@cs.ualberta.ca>
   6 *
   7 * Based On: cirrus_vga.c
   8 *          Copyright (c) 2004 Fabrice Bellard
   9 *          Copyright (c) 2004 Makoto Suzuki (suzu)
  10 *
  11 *      and rtl8139.c
  12 *          Copyright (c) 2006 Igor Kovalenko
  13 *
  14 * This code is licensed under the GNU GPL v2.
  15 *
  16 * Contributions after 2012-01-13 are licensed under the terms of the
  17 * GNU GPL, version 2 or (at your option) any later version.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "qapi/error.h"
  23#include "qemu/cutils.h"
  24#include "hw/pci/pci.h"
  25#include "hw/qdev-properties.h"
  26#include "hw/qdev-properties-system.h"
  27#include "hw/pci/msi.h"
  28#include "hw/pci/msix.h"
  29#include "sysemu/kvm.h"
  30#include "migration/blocker.h"
  31#include "migration/vmstate.h"
  32#include "qemu/error-report.h"
  33#include "qemu/event_notifier.h"
  34#include "qemu/module.h"
  35#include "qom/object_interfaces.h"
  36#include "chardev/char-fe.h"
  37#include "sysemu/hostmem.h"
  38#include "qapi/visitor.h"
  39
  40#include "hw/misc/ivshmem.h"
  41#include "qom/object.h"
  42
  43#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
  44#define PCI_DEVICE_ID_IVSHMEM   0x1110
  45
  46#define IVSHMEM_MAX_PEERS UINT16_MAX
  47#define IVSHMEM_IOEVENTFD   0
  48#define IVSHMEM_MSI     1
  49
  50#define IVSHMEM_REG_BAR_SIZE 0x100
  51
  52#define IVSHMEM_DEBUG 0
  53#define IVSHMEM_DPRINTF(fmt, ...)                       \
  54    do {                                                \
  55        if (IVSHMEM_DEBUG) {                            \
  56            printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
  57        }                                               \
  58    } while (0)
  59
  60#define TYPE_IVSHMEM_COMMON "ivshmem-common"
  61typedef struct IVShmemState IVShmemState;
  62DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_COMMON,
  63                         TYPE_IVSHMEM_COMMON)
  64
  65#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  66DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_PLAIN,
  67                         TYPE_IVSHMEM_PLAIN)
  68
  69#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  70DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_DOORBELL,
  71                         TYPE_IVSHMEM_DOORBELL)
  72
  73#define TYPE_IVSHMEM "ivshmem"
  74DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM,
  75                         TYPE_IVSHMEM)
  76
  77typedef struct Peer {
  78    int nb_eventfds;
  79    EventNotifier *eventfds;
  80} Peer;
  81
  82typedef struct MSIVector {
  83    PCIDevice *pdev;
  84    int virq;
  85    bool unmasked;
  86} MSIVector;
  87
  88struct IVShmemState {
  89    /*< private >*/
  90    PCIDevice parent_obj;
  91    /*< public >*/
  92
  93    uint32_t features;
  94
  95    /* exactly one of these two may be set */
  96    HostMemoryBackend *hostmem; /* with interrupts */
  97    CharBackend server_chr; /* without interrupts */
  98
  99    /* registers */
 100    uint32_t intrmask;
 101    uint32_t intrstatus;
 102    int vm_id;
 103
 104    /* BARs */
 105    MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
 106    MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
 107    MemoryRegion server_bar2;   /* used with server_chr */
 108
 109    /* interrupt support */
 110    Peer *peers;
 111    int nb_peers;               /* space in @peers[] */
 112    uint32_t vectors;
 113    MSIVector *msi_vectors;
 114    uint64_t msg_buf;           /* buffer for receiving server messages */
 115    int msg_buffered_bytes;     /* #bytes in @msg_buf */
 116
 117    /* migration stuff */
 118    OnOffAuto master;
 119    Error *migration_blocker;
 120};
 121
 122/* registers for the Inter-VM shared memory device */
 123enum ivshmem_registers {
 124    INTRMASK = 0,
 125    INTRSTATUS = 4,
 126    IVPOSITION = 8,
 127    DOORBELL = 12,
 128};
 129
 130static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
 131                                                    unsigned int feature) {
 132    return (ivs->features & (1 << feature));
 133}
 134
 135static inline bool ivshmem_is_master(IVShmemState *s)
 136{
 137    assert(s->master != ON_OFF_AUTO_AUTO);
 138    return s->master == ON_OFF_AUTO_ON;
 139}
 140
 141static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 142{
 143    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
 144
 145    s->intrmask = val;
 146}
 147
 148static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
 149{
 150    uint32_t ret = s->intrmask;
 151
 152    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
 153    return ret;
 154}
 155
 156static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 157{
 158    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
 159
 160    s->intrstatus = val;
 161}
 162
 163static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
 164{
 165    uint32_t ret = s->intrstatus;
 166
 167    /* reading ISR clears all interrupts */
 168    s->intrstatus = 0;
 169    return ret;
 170}
 171
 172static void ivshmem_io_write(void *opaque, hwaddr addr,
 173                             uint64_t val, unsigned size)
 174{
 175    IVShmemState *s = opaque;
 176
 177    uint16_t dest = val >> 16;
 178    uint16_t vector = val & 0xff;
 179
 180    addr &= 0xfc;
 181
 182    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
 183    switch (addr)
 184    {
 185        case INTRMASK:
 186            ivshmem_IntrMask_write(s, val);
 187            break;
 188
 189        case INTRSTATUS:
 190            ivshmem_IntrStatus_write(s, val);
 191            break;
 192
 193        case DOORBELL:
 194            /* check that dest VM ID is reasonable */
 195            if (dest >= s->nb_peers) {
 196                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
 197                break;
 198            }
 199
 200            /* check doorbell range */
 201            if (vector < s->peers[dest].nb_eventfds) {
 202                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
 203                event_notifier_set(&s->peers[dest].eventfds[vector]);
 204            } else {
 205                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
 206                                vector, dest);
 207            }
 208            break;
 209        default:
 210            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
 211    }
 212}
 213
 214static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 215                                unsigned size)
 216{
 217
 218    IVShmemState *s = opaque;
 219    uint32_t ret;
 220
 221    switch (addr)
 222    {
 223        case INTRMASK:
 224            ret = ivshmem_IntrMask_read(s);
 225            break;
 226
 227        case INTRSTATUS:
 228            ret = ivshmem_IntrStatus_read(s);
 229            break;
 230
 231        case IVPOSITION:
 232            ret = s->vm_id;
 233            break;
 234
 235        default:
 236            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
 237            ret = 0;
 238    }
 239
 240    return ret;
 241}
 242
 243static const MemoryRegionOps ivshmem_mmio_ops = {
 244    .read = ivshmem_io_read,
 245    .write = ivshmem_io_write,
 246    .endianness = DEVICE_NATIVE_ENDIAN,
 247    .impl = {
 248        .min_access_size = 4,
 249        .max_access_size = 4,
 250    },
 251};
 252
 253static void ivshmem_vector_notify(void *opaque)
 254{
 255    MSIVector *entry = opaque;
 256    PCIDevice *pdev = entry->pdev;
 257    IVShmemState *s = IVSHMEM_COMMON(pdev);
 258    int vector = entry - s->msi_vectors;
 259    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 260
 261    if (!event_notifier_test_and_clear(n)) {
 262        return;
 263    }
 264
 265    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
 266    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 267        if (msix_enabled(pdev)) {
 268            msix_notify(pdev, vector);
 269        }
 270    } else {
 271        ivshmem_IntrStatus_write(s, 1);
 272    }
 273}
 274
 275static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
 276                                 MSIMessage msg)
 277{
 278    IVShmemState *s = IVSHMEM_COMMON(dev);
 279    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 280    MSIVector *v = &s->msi_vectors[vector];
 281    int ret;
 282
 283    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
 284    if (!v->pdev) {
 285        error_report("ivshmem: vector %d route does not exist", vector);
 286        return -EINVAL;
 287    }
 288    assert(!v->unmasked);
 289
 290    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
 291    if (ret < 0) {
 292        return ret;
 293    }
 294    kvm_irqchip_commit_routes(kvm_state);
 295
 296    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 297    if (ret < 0) {
 298        return ret;
 299    }
 300    v->unmasked = true;
 301
 302    return 0;
 303}
 304
 305static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
 306{
 307    IVShmemState *s = IVSHMEM_COMMON(dev);
 308    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 309    MSIVector *v = &s->msi_vectors[vector];
 310    int ret;
 311
 312    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
 313    if (!v->pdev) {
 314        error_report("ivshmem: vector %d route does not exist", vector);
 315        return;
 316    }
 317    assert(v->unmasked);
 318
 319    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
 320    if (ret < 0) {
 321        error_report("remove_irqfd_notifier_gsi failed");
 322        return;
 323    }
 324    v->unmasked = false;
 325}
 326
 327static void ivshmem_vector_poll(PCIDevice *dev,
 328                                unsigned int vector_start,
 329                                unsigned int vector_end)
 330{
 331    IVShmemState *s = IVSHMEM_COMMON(dev);
 332    unsigned int vector;
 333
 334    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
 335
 336    vector_end = MIN(vector_end, s->vectors);
 337
 338    for (vector = vector_start; vector < vector_end; vector++) {
 339        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
 340
 341        if (!msix_is_masked(dev, vector)) {
 342            continue;
 343        }
 344
 345        if (event_notifier_test_and_clear(notifier)) {
 346            msix_set_pending(dev, vector);
 347        }
 348    }
 349}
 350
 351static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
 352                                 int vector)
 353{
 354    int eventfd = event_notifier_get_fd(n);
 355
 356    assert(!s->msi_vectors[vector].pdev);
 357    s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 358
 359    qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
 360                        NULL, &s->msi_vectors[vector]);
 361}
 362
 363static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
 364{
 365    memory_region_add_eventfd(&s->ivshmem_mmio,
 366                              DOORBELL,
 367                              4,
 368                              true,
 369                              (posn << 16) | i,
 370                              &s->peers[posn].eventfds[i]);
 371}
 372
 373static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
 374{
 375    memory_region_del_eventfd(&s->ivshmem_mmio,
 376                              DOORBELL,
 377                              4,
 378                              true,
 379                              (posn << 16) | i,
 380                              &s->peers[posn].eventfds[i]);
 381}
 382
 383static void close_peer_eventfds(IVShmemState *s, int posn)
 384{
 385    int i, n;
 386
 387    assert(posn >= 0 && posn < s->nb_peers);
 388    n = s->peers[posn].nb_eventfds;
 389
 390    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 391        memory_region_transaction_begin();
 392        for (i = 0; i < n; i++) {
 393            ivshmem_del_eventfd(s, posn, i);
 394        }
 395        memory_region_transaction_commit();
 396    }
 397
 398    for (i = 0; i < n; i++) {
 399        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
 400    }
 401
 402    g_free(s->peers[posn].eventfds);
 403    s->peers[posn].nb_eventfds = 0;
 404}
 405
 406static void resize_peers(IVShmemState *s, int nb_peers)
 407{
 408    int old_nb_peers = s->nb_peers;
 409    int i;
 410
 411    assert(nb_peers > old_nb_peers);
 412    IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
 413
 414    s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
 415    s->nb_peers = nb_peers;
 416
 417    for (i = old_nb_peers; i < nb_peers; i++) {
 418        s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
 419        s->peers[i].nb_eventfds = 0;
 420    }
 421}
 422
 423static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
 424                                     Error **errp)
 425{
 426    PCIDevice *pdev = PCI_DEVICE(s);
 427    int ret;
 428
 429    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 430    assert(!s->msi_vectors[vector].pdev);
 431
 432    ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
 433    if (ret < 0) {
 434        error_setg(errp, "kvm_irqchip_add_msi_route failed");
 435        return;
 436    }
 437
 438    s->msi_vectors[vector].virq = ret;
 439    s->msi_vectors[vector].pdev = pdev;
 440}
 441
 442static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 443{
 444    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 445    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
 446        ivshmem_has_feature(s, IVSHMEM_MSI);
 447    PCIDevice *pdev = PCI_DEVICE(s);
 448    Error *err = NULL;
 449
 450    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
 451
 452    if (!with_irqfd) {
 453        IVSHMEM_DPRINTF("with eventfd\n");
 454        watch_vector_notifier(s, n, vector);
 455    } else if (msix_enabled(pdev)) {
 456        IVSHMEM_DPRINTF("with irqfd\n");
 457        ivshmem_add_kvm_msi_virq(s, vector, &err);
 458        if (err) {
 459            error_propagate(errp, err);
 460            return;
 461        }
 462
 463        if (!msix_is_masked(pdev, vector)) {
 464            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
 465                                               s->msi_vectors[vector].virq);
 466            /* TODO handle error */
 467        }
 468    } else {
 469        /* it will be delayed until msix is enabled, in write_config */
 470        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
 471    }
 472}
 473
 474static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 475{
 476    Error *local_err = NULL;
 477    struct stat buf;
 478    size_t size;
 479
 480    if (s->ivshmem_bar2) {
 481        error_setg(errp, "server sent unexpected shared memory message");
 482        close(fd);
 483        return;
 484    }
 485
 486    if (fstat(fd, &buf) < 0) {
 487        error_setg_errno(errp, errno,
 488            "can't determine size of shared memory sent by server");
 489        close(fd);
 490        return;
 491    }
 492
 493    size = buf.st_size;
 494
 495    /* mmap the region and map into the BAR2 */
 496    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
 497                                   size, RAM_SHARED, fd, 0, &local_err);
 498    if (local_err) {
 499        error_propagate(errp, local_err);
 500        return;
 501    }
 502
 503    s->ivshmem_bar2 = &s->server_bar2;
 504}
 505
 506static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
 507                                   Error **errp)
 508{
 509    IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
 510    if (posn >= s->nb_peers || posn == s->vm_id) {
 511        error_setg(errp, "invalid peer %d", posn);
 512        return;
 513    }
 514    close_peer_eventfds(s, posn);
 515}
 516
 517static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
 518                                Error **errp)
 519{
 520    Peer *peer = &s->peers[posn];
 521    int vector;
 522
 523    /*
 524     * The N-th connect message for this peer comes with the file
 525     * descriptor for vector N-1.  Count messages to find the vector.
 526     */
 527    if (peer->nb_eventfds >= s->vectors) {
 528        error_setg(errp, "Too many eventfd received, device has %d vectors",
 529                   s->vectors);
 530        close(fd);
 531        return;
 532    }
 533    vector = peer->nb_eventfds++;
 534
 535    IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
 536    event_notifier_init_fd(&peer->eventfds[vector], fd);
 537    fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
 538
 539    if (posn == s->vm_id) {
 540        setup_interrupt(s, vector, errp);
 541        /* TODO do we need to handle the error? */
 542    }
 543
 544    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 545        ivshmem_add_eventfd(s, posn, vector);
 546    }
 547}
 548
 549static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
 550{
 551    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
 552
 553    if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
 554        error_setg(errp, "server sent invalid message %" PRId64, msg);
 555        close(fd);
 556        return;
 557    }
 558
 559    if (msg == -1) {
 560        process_msg_shmem(s, fd, errp);
 561        return;
 562    }
 563
 564    if (msg >= s->nb_peers) {
 565        resize_peers(s, msg + 1);
 566    }
 567
 568    if (fd >= 0) {
 569        process_msg_connect(s, msg, fd, errp);
 570    } else {
 571        process_msg_disconnect(s, msg, errp);
 572    }
 573}
 574
 575static int ivshmem_can_receive(void *opaque)
 576{
 577    IVShmemState *s = opaque;
 578
 579    assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
 580    return sizeof(s->msg_buf) - s->msg_buffered_bytes;
 581}
 582
 583static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 584{
 585    IVShmemState *s = opaque;
 586    Error *err = NULL;
 587    int fd;
 588    int64_t msg;
 589
 590    assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
 591    memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
 592    s->msg_buffered_bytes += size;
 593    if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
 594        return;
 595    }
 596    msg = le64_to_cpu(s->msg_buf);
 597    s->msg_buffered_bytes = 0;
 598
 599    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 600
 601    process_msg(s, msg, fd, &err);
 602    if (err) {
 603        error_report_err(err);
 604    }
 605}
 606
 607static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 608{
 609    int64_t msg;
 610    int n, ret;
 611
 612    n = 0;
 613    do {
 614        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
 615                                   sizeof(msg) - n);
 616        if (ret < 0) {
 617            if (ret == -EINTR) {
 618                continue;
 619            }
 620            error_setg_errno(errp, -ret, "read from server failed");
 621            return INT64_MIN;
 622        }
 623        n += ret;
 624    } while (n < sizeof(msg));
 625
 626    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
 627    return le64_to_cpu(msg);
 628}
 629
 630static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
 631{
 632    Error *err = NULL;
 633    int64_t msg;
 634    int fd;
 635
 636    msg = ivshmem_recv_msg(s, &fd, &err);
 637    if (err) {
 638        error_propagate(errp, err);
 639        return;
 640    }
 641    if (msg != IVSHMEM_PROTOCOL_VERSION) {
 642        error_setg(errp, "server sent version %" PRId64 ", expecting %d",
 643                   msg, IVSHMEM_PROTOCOL_VERSION);
 644        return;
 645    }
 646    if (fd != -1) {
 647        error_setg(errp, "server sent invalid version message");
 648        return;
 649    }
 650
 651    /*
 652     * ivshmem-server sends the remaining initial messages in a fixed
 653     * order, but the device has always accepted them in any order.
 654     * Stay as compatible as practical, just in case people use
 655     * servers that behave differently.
 656     */
 657
 658    /*
 659     * ivshmem_device_spec.txt has always required the ID message
 660     * right here, and ivshmem-server has always complied.  However,
 661     * older versions of the device accepted it out of order, but
 662     * broke when an interrupt setup message arrived before it.
 663     */
 664    msg = ivshmem_recv_msg(s, &fd, &err);
 665    if (err) {
 666        error_propagate(errp, err);
 667        return;
 668    }
 669    if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
 670        error_setg(errp, "server sent invalid ID message");
 671        return;
 672    }
 673    s->vm_id = msg;
 674
 675    /*
 676     * Receive more messages until we got shared memory.
 677     */
 678    do {
 679        msg = ivshmem_recv_msg(s, &fd, &err);
 680        if (err) {
 681            error_propagate(errp, err);
 682            return;
 683        }
 684        process_msg(s, msg, fd, &err);
 685        if (err) {
 686            error_propagate(errp, err);
 687            return;
 688        }
 689    } while (msg != -1);
 690
 691    /*
 692     * This function must either map the shared memory or fail.  The
 693     * loop above ensures that: it terminates normally only after it
 694     * successfully processed the server's shared memory message.
 695     * Assert that actually mapped the shared memory:
 696     */
 697    assert(s->ivshmem_bar2);
 698}
 699
 700/* Select the MSI-X vectors used by device.
 701 * ivshmem maps events to vectors statically, so
 702 * we just enable all vectors on init and after reset. */
 703static void ivshmem_msix_vector_use(IVShmemState *s)
 704{
 705    PCIDevice *d = PCI_DEVICE(s);
 706    int i;
 707
 708    for (i = 0; i < s->vectors; i++) {
 709        msix_vector_use(d, i);
 710    }
 711}
 712
 713static void ivshmem_disable_irqfd(IVShmemState *s);
 714
 715static void ivshmem_reset(DeviceState *d)
 716{
 717    IVShmemState *s = IVSHMEM_COMMON(d);
 718
 719    ivshmem_disable_irqfd(s);
 720
 721    s->intrstatus = 0;
 722    s->intrmask = 0;
 723    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 724        ivshmem_msix_vector_use(s);
 725    }
 726}
 727
 728static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 729{
 730    /* allocate QEMU callback data for receiving interrupts */
 731    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 732
 733    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 734        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
 735            return -1;
 736        }
 737
 738        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 739        ivshmem_msix_vector_use(s);
 740    }
 741
 742    return 0;
 743}
 744
 745static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 746{
 747    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 748
 749    if (s->msi_vectors[vector].pdev == NULL) {
 750        return;
 751    }
 752
 753    /* it was cleaned when masked in the frontend. */
 754    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 755
 756    s->msi_vectors[vector].pdev = NULL;
 757}
 758
 759static void ivshmem_enable_irqfd(IVShmemState *s)
 760{
 761    PCIDevice *pdev = PCI_DEVICE(s);
 762    int i;
 763
 764    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 765        Error *err = NULL;
 766
 767        ivshmem_add_kvm_msi_virq(s, i, &err);
 768        if (err) {
 769            error_report_err(err);
 770            goto undo;
 771        }
 772    }
 773
 774    if (msix_set_vector_notifiers(pdev,
 775                                  ivshmem_vector_unmask,
 776                                  ivshmem_vector_mask,
 777                                  ivshmem_vector_poll)) {
 778        error_report("ivshmem: msix_set_vector_notifiers failed");
 779        goto undo;
 780    }
 781    return;
 782
 783undo:
 784    while (--i >= 0) {
 785        ivshmem_remove_kvm_msi_virq(s, i);
 786    }
 787}
 788
 789static void ivshmem_disable_irqfd(IVShmemState *s)
 790{
 791    PCIDevice *pdev = PCI_DEVICE(s);
 792    int i;
 793
 794    if (!pdev->msix_vector_use_notifier) {
 795        return;
 796    }
 797
 798    msix_unset_vector_notifiers(pdev);
 799
 800    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 801        /*
 802         * MSI-X is already disabled here so msix_unset_vector_notifiers()
 803         * didn't call our release notifier.  Do it now to keep our masks and
 804         * unmasks balanced.
 805         */
 806        if (s->msi_vectors[i].unmasked) {
 807            ivshmem_vector_mask(pdev, i);
 808        }
 809        ivshmem_remove_kvm_msi_virq(s, i);
 810    }
 811
 812}
 813
 814static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
 815                                 uint32_t val, int len)
 816{
 817    IVShmemState *s = IVSHMEM_COMMON(pdev);
 818    int is_enabled, was_enabled = msix_enabled(pdev);
 819
 820    pci_default_write_config(pdev, address, val, len);
 821    is_enabled = msix_enabled(pdev);
 822
 823    if (kvm_msi_via_irqfd_enabled()) {
 824        if (!was_enabled && is_enabled) {
 825            ivshmem_enable_irqfd(s);
 826        } else if (was_enabled && !is_enabled) {
 827            ivshmem_disable_irqfd(s);
 828        }
 829    }
 830}
 831
 832static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
 833{
 834    IVShmemState *s = IVSHMEM_COMMON(dev);
 835    Error *err = NULL;
 836    uint8_t *pci_conf;
 837
 838    /* IRQFD requires MSI */
 839    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
 840        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
 841        error_setg(errp, "ioeventfd/irqfd requires MSI");
 842        return;
 843    }
 844
 845    pci_conf = dev->config;
 846    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
 847
 848    memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
 849                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 850
 851    /* region for registers*/
 852    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
 853                     &s->ivshmem_mmio);
 854
 855    if (s->hostmem != NULL) {
 856        IVSHMEM_DPRINTF("using hostmem\n");
 857
 858        s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
 859        host_memory_backend_set_mapped(s->hostmem, true);
 860    } else {
 861        Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
 862        assert(chr);
 863
 864        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
 865                        chr->filename);
 866
 867        /* we allocate enough space for 16 peers and grow as needed */
 868        resize_peers(s, 16);
 869
 870        /*
 871         * Receive setup messages from server synchronously.
 872         * Older versions did it asynchronously, but that creates a
 873         * number of entertaining race conditions.
 874         */
 875        ivshmem_recv_setup(s, &err);
 876        if (err) {
 877            error_propagate(errp, err);
 878            return;
 879        }
 880
 881        if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
 882            error_setg(errp,
 883                       "master must connect to the server before any peers");
 884            return;
 885        }
 886
 887        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
 888                                 ivshmem_read, NULL, NULL, s, NULL, true);
 889
 890        if (ivshmem_setup_interrupts(s, errp) < 0) {
 891            error_prepend(errp, "Failed to initialize interrupts: ");
 892            return;
 893        }
 894    }
 895
 896    if (s->master == ON_OFF_AUTO_AUTO) {
 897        s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 898    }
 899
 900    if (!ivshmem_is_master(s)) {
 901        error_setg(&s->migration_blocker,
 902                   "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
 903        if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
 904            error_free(s->migration_blocker);
 905            return;
 906        }
 907    }
 908
 909    vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
 910    pci_register_bar(PCI_DEVICE(s), 2,
 911                     PCI_BASE_ADDRESS_SPACE_MEMORY |
 912                     PCI_BASE_ADDRESS_MEM_PREFETCH |
 913                     PCI_BASE_ADDRESS_MEM_TYPE_64,
 914                     s->ivshmem_bar2);
 915}
 916
 917static void ivshmem_exit(PCIDevice *dev)
 918{
 919    IVShmemState *s = IVSHMEM_COMMON(dev);
 920    int i;
 921
 922    if (s->migration_blocker) {
 923        migrate_del_blocker(s->migration_blocker);
 924        error_free(s->migration_blocker);
 925    }
 926
 927    if (memory_region_is_mapped(s->ivshmem_bar2)) {
 928        if (!s->hostmem) {
 929            void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
 930            int fd;
 931
 932            if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
 933                error_report("Failed to munmap shared memory %s",
 934                             strerror(errno));
 935            }
 936
 937            fd = memory_region_get_fd(s->ivshmem_bar2);
 938            close(fd);
 939        }
 940
 941        vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
 942    }
 943
 944    if (s->hostmem) {
 945        host_memory_backend_set_mapped(s->hostmem, false);
 946    }
 947
 948    if (s->peers) {
 949        for (i = 0; i < s->nb_peers; i++) {
 950            close_peer_eventfds(s, i);
 951        }
 952        g_free(s->peers);
 953    }
 954
 955    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 956        msix_uninit_exclusive_bar(dev);
 957    }
 958
 959    g_free(s->msi_vectors);
 960}
 961
 962static int ivshmem_pre_load(void *opaque)
 963{
 964    IVShmemState *s = opaque;
 965
 966    if (!ivshmem_is_master(s)) {
 967        error_report("'peer' devices are not migratable");
 968        return -EINVAL;
 969    }
 970
 971    return 0;
 972}
 973
 974static int ivshmem_post_load(void *opaque, int version_id)
 975{
 976    IVShmemState *s = opaque;
 977
 978    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 979        ivshmem_msix_vector_use(s);
 980    }
 981    return 0;
 982}
 983
 984static void ivshmem_common_class_init(ObjectClass *klass, void *data)
 985{
 986    DeviceClass *dc = DEVICE_CLASS(klass);
 987    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 988
 989    k->realize = ivshmem_common_realize;
 990    k->exit = ivshmem_exit;
 991    k->config_write = ivshmem_write_config;
 992    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
 993    k->device_id = PCI_DEVICE_ID_IVSHMEM;
 994    k->class_id = PCI_CLASS_MEMORY_RAM;
 995    k->revision = 1;
 996    dc->reset = ivshmem_reset;
 997    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 998    dc->desc = "Inter-VM shared memory";
 999}
1000
1001static const TypeInfo ivshmem_common_info = {
1002    .name          = TYPE_IVSHMEM_COMMON,
1003    .parent        = TYPE_PCI_DEVICE,
1004    .instance_size = sizeof(IVShmemState),
1005    .abstract      = true,
1006    .class_init    = ivshmem_common_class_init,
1007    .interfaces = (InterfaceInfo[]) {
1008        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1009        { },
1010    },
1011};
1012
1013static const VMStateDescription ivshmem_plain_vmsd = {
1014    .name = TYPE_IVSHMEM_PLAIN,
1015    .version_id = 0,
1016    .minimum_version_id = 0,
1017    .pre_load = ivshmem_pre_load,
1018    .post_load = ivshmem_post_load,
1019    .fields = (VMStateField[]) {
1020        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1021        VMSTATE_UINT32(intrstatus, IVShmemState),
1022        VMSTATE_UINT32(intrmask, IVShmemState),
1023        VMSTATE_END_OF_LIST()
1024    },
1025};
1026
1027static Property ivshmem_plain_properties[] = {
1028    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1029    DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1030                     HostMemoryBackend *),
1031    DEFINE_PROP_END_OF_LIST(),
1032};
1033
1034static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1035{
1036    IVShmemState *s = IVSHMEM_COMMON(dev);
1037
1038    if (!s->hostmem) {
1039        error_setg(errp, "You must specify a 'memdev'");
1040        return;
1041    } else if (host_memory_backend_is_mapped(s->hostmem)) {
1042        error_setg(errp, "can't use already busy memdev: %s",
1043                   object_get_canonical_path_component(OBJECT(s->hostmem)));
1044        return;
1045    }
1046
1047    ivshmem_common_realize(dev, errp);
1048}
1049
1050static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1051{
1052    DeviceClass *dc = DEVICE_CLASS(klass);
1053    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1054
1055    k->realize = ivshmem_plain_realize;
1056    device_class_set_props(dc, ivshmem_plain_properties);
1057    dc->vmsd = &ivshmem_plain_vmsd;
1058}
1059
1060static const TypeInfo ivshmem_plain_info = {
1061    .name          = TYPE_IVSHMEM_PLAIN,
1062    .parent        = TYPE_IVSHMEM_COMMON,
1063    .instance_size = sizeof(IVShmemState),
1064    .class_init    = ivshmem_plain_class_init,
1065};
1066
1067static const VMStateDescription ivshmem_doorbell_vmsd = {
1068    .name = TYPE_IVSHMEM_DOORBELL,
1069    .version_id = 0,
1070    .minimum_version_id = 0,
1071    .pre_load = ivshmem_pre_load,
1072    .post_load = ivshmem_post_load,
1073    .fields = (VMStateField[]) {
1074        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1075        VMSTATE_MSIX(parent_obj, IVShmemState),
1076        VMSTATE_UINT32(intrstatus, IVShmemState),
1077        VMSTATE_UINT32(intrmask, IVShmemState),
1078        VMSTATE_END_OF_LIST()
1079    },
1080};
1081
1082static Property ivshmem_doorbell_properties[] = {
1083    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1084    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1085    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1086                    true),
1087    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1088    DEFINE_PROP_END_OF_LIST(),
1089};
1090
1091static void ivshmem_doorbell_init(Object *obj)
1092{
1093    IVShmemState *s = IVSHMEM_DOORBELL(obj);
1094
1095    s->features |= (1 << IVSHMEM_MSI);
1096}
1097
1098static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1099{
1100    IVShmemState *s = IVSHMEM_COMMON(dev);
1101
1102    if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1103        error_setg(errp, "You must specify a 'chardev'");
1104        return;
1105    }
1106
1107    ivshmem_common_realize(dev, errp);
1108}
1109
1110static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1111{
1112    DeviceClass *dc = DEVICE_CLASS(klass);
1113    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1114
1115    k->realize = ivshmem_doorbell_realize;
1116    device_class_set_props(dc, ivshmem_doorbell_properties);
1117    dc->vmsd = &ivshmem_doorbell_vmsd;
1118}
1119
1120static const TypeInfo ivshmem_doorbell_info = {
1121    .name          = TYPE_IVSHMEM_DOORBELL,
1122    .parent        = TYPE_IVSHMEM_COMMON,
1123    .instance_size = sizeof(IVShmemState),
1124    .instance_init = ivshmem_doorbell_init,
1125    .class_init    = ivshmem_doorbell_class_init,
1126};
1127
1128static void ivshmem_register_types(void)
1129{
1130    type_register_static(&ivshmem_common_info);
1131    type_register_static(&ivshmem_plain_info);
1132    type_register_static(&ivshmem_doorbell_info);
1133}
1134
1135type_init(ivshmem_register_types)
1136