qemu/hw/misc/ivshmem.c
<<
>>
Prefs
   1/*
   2 * Inter-VM Shared Memory PCI device.
   3 *
   4 * Author:
   5 *      Cam Macdonell <cam@cs.ualberta.ca>
   6 *
   7 * Based On: cirrus_vga.c
   8 *          Copyright (c) 2004 Fabrice Bellard
   9 *          Copyright (c) 2004 Makoto Suzuki (suzu)
  10 *
  11 *      and rtl8139.c
  12 *          Copyright (c) 2006 Igor Kovalenko
  13 *
  14 * This code is licensed under the GNU GPL v2.
  15 *
  16 * Contributions after 2012-01-13 are licensed under the terms of the
  17 * GNU GPL, version 2 or (at your option) any later version.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "qapi/error.h"
  23#include "qemu/cutils.h"
  24#include "hw/hw.h"
  25#include "hw/pci/pci.h"
  26#include "hw/pci/msi.h"
  27#include "hw/pci/msix.h"
  28#include "sysemu/kvm.h"
  29#include "migration/blocker.h"
  30#include "qemu/error-report.h"
  31#include "qemu/event_notifier.h"
  32#include "qemu/module.h"
  33#include "qom/object_interfaces.h"
  34#include "chardev/char-fe.h"
  35#include "sysemu/hostmem.h"
  36#include "sysemu/qtest.h"
  37#include "qapi/visitor.h"
  38
  39#include "hw/misc/ivshmem.h"
  40
  41#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
  42#define PCI_DEVICE_ID_IVSHMEM   0x1110
  43
  44#define IVSHMEM_MAX_PEERS UINT16_MAX
  45#define IVSHMEM_IOEVENTFD   0
  46#define IVSHMEM_MSI     1
  47
  48#define IVSHMEM_REG_BAR_SIZE 0x100
  49
  50#define IVSHMEM_DEBUG 0
  51#define IVSHMEM_DPRINTF(fmt, ...)                       \
  52    do {                                                \
  53        if (IVSHMEM_DEBUG) {                            \
  54            printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
  55        }                                               \
  56    } while (0)
  57
  58#define TYPE_IVSHMEM_COMMON "ivshmem-common"
  59#define IVSHMEM_COMMON(obj) \
  60    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
  61
  62#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  63#define IVSHMEM_PLAIN(obj) \
  64    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
  65
  66#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  67#define IVSHMEM_DOORBELL(obj) \
  68    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
  69
  70#define TYPE_IVSHMEM "ivshmem"
  71#define IVSHMEM(obj) \
  72    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
  73
  74typedef struct Peer {
  75    int nb_eventfds;
  76    EventNotifier *eventfds;
  77} Peer;
  78
  79typedef struct MSIVector {
  80    PCIDevice *pdev;
  81    int virq;
  82    bool unmasked;
  83} MSIVector;
  84
  85typedef struct IVShmemState {
  86    /*< private >*/
  87    PCIDevice parent_obj;
  88    /*< public >*/
  89
  90    uint32_t features;
  91
  92    /* exactly one of these two may be set */
  93    HostMemoryBackend *hostmem; /* with interrupts */
  94    CharBackend server_chr; /* without interrupts */
  95
  96    /* registers */
  97    uint32_t intrmask;
  98    uint32_t intrstatus;
  99    int vm_id;
 100
 101    /* BARs */
 102    MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
 103    MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
 104    MemoryRegion server_bar2;   /* used with server_chr */
 105
 106    /* interrupt support */
 107    Peer *peers;
 108    int nb_peers;               /* space in @peers[] */
 109    uint32_t vectors;
 110    MSIVector *msi_vectors;
 111    uint64_t msg_buf;           /* buffer for receiving server messages */
 112    int msg_buffered_bytes;     /* #bytes in @msg_buf */
 113
 114    /* migration stuff */
 115    OnOffAuto master;
 116    Error *migration_blocker;
 117} IVShmemState;
 118
 119/* registers for the Inter-VM shared memory device */
 120enum ivshmem_registers {
 121    INTRMASK = 0,
 122    INTRSTATUS = 4,
 123    IVPOSITION = 8,
 124    DOORBELL = 12,
 125};
 126
 127static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
 128                                                    unsigned int feature) {
 129    return (ivs->features & (1 << feature));
 130}
 131
 132static inline bool ivshmem_is_master(IVShmemState *s)
 133{
 134    assert(s->master != ON_OFF_AUTO_AUTO);
 135    return s->master == ON_OFF_AUTO_ON;
 136}
 137
 138static void ivshmem_update_irq(IVShmemState *s)
 139{
 140    PCIDevice *d = PCI_DEVICE(s);
 141    uint32_t isr = s->intrstatus & s->intrmask;
 142
 143    /*
 144     * Do nothing unless the device actually uses INTx.  Here's how
 145     * the device variants signal interrupts, what they put in PCI
 146     * config space:
 147     * Device variant    Interrupt  Interrupt Pin  MSI-X cap.
 148     * ivshmem-plain         none            0         no
 149     * ivshmem-doorbell     MSI-X            1        yes(1)
 150     * ivshmem,msi=off       INTx            1         no
 151     * ivshmem,msi=on       MSI-X            1(2)     yes(1)
 152     * (1) if guest enabled MSI-X
 153     * (2) the device lies
 154     * Leads to the condition for doing nothing:
 155     */
 156    if (ivshmem_has_feature(s, IVSHMEM_MSI)
 157        || !d->config[PCI_INTERRUPT_PIN]) {
 158        return;
 159    }
 160
 161    /* don't print ISR resets */
 162    if (isr) {
 163        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
 164                        isr ? 1 : 0, s->intrstatus, s->intrmask);
 165    }
 166
 167    pci_set_irq(d, isr != 0);
 168}
 169
 170static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 171{
 172    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
 173
 174    s->intrmask = val;
 175    ivshmem_update_irq(s);
 176}
 177
 178static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
 179{
 180    uint32_t ret = s->intrmask;
 181
 182    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
 183    return ret;
 184}
 185
 186static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 187{
 188    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
 189
 190    s->intrstatus = val;
 191    ivshmem_update_irq(s);
 192}
 193
 194static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
 195{
 196    uint32_t ret = s->intrstatus;
 197
 198    /* reading ISR clears all interrupts */
 199    s->intrstatus = 0;
 200    ivshmem_update_irq(s);
 201    return ret;
 202}
 203
 204static void ivshmem_io_write(void *opaque, hwaddr addr,
 205                             uint64_t val, unsigned size)
 206{
 207    IVShmemState *s = opaque;
 208
 209    uint16_t dest = val >> 16;
 210    uint16_t vector = val & 0xff;
 211
 212    addr &= 0xfc;
 213
 214    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
 215    switch (addr)
 216    {
 217        case INTRMASK:
 218            ivshmem_IntrMask_write(s, val);
 219            break;
 220
 221        case INTRSTATUS:
 222            ivshmem_IntrStatus_write(s, val);
 223            break;
 224
 225        case DOORBELL:
 226            /* check that dest VM ID is reasonable */
 227            if (dest >= s->nb_peers) {
 228                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
 229                break;
 230            }
 231
 232            /* check doorbell range */
 233            if (vector < s->peers[dest].nb_eventfds) {
 234                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
 235                event_notifier_set(&s->peers[dest].eventfds[vector]);
 236            } else {
 237                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
 238                                vector, dest);
 239            }
 240            break;
 241        default:
 242            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
 243    }
 244}
 245
 246static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 247                                unsigned size)
 248{
 249
 250    IVShmemState *s = opaque;
 251    uint32_t ret;
 252
 253    switch (addr)
 254    {
 255        case INTRMASK:
 256            ret = ivshmem_IntrMask_read(s);
 257            break;
 258
 259        case INTRSTATUS:
 260            ret = ivshmem_IntrStatus_read(s);
 261            break;
 262
 263        case IVPOSITION:
 264            ret = s->vm_id;
 265            break;
 266
 267        default:
 268            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
 269            ret = 0;
 270    }
 271
 272    return ret;
 273}
 274
 275static const MemoryRegionOps ivshmem_mmio_ops = {
 276    .read = ivshmem_io_read,
 277    .write = ivshmem_io_write,
 278    .endianness = DEVICE_NATIVE_ENDIAN,
 279    .impl = {
 280        .min_access_size = 4,
 281        .max_access_size = 4,
 282    },
 283};
 284
 285static void ivshmem_vector_notify(void *opaque)
 286{
 287    MSIVector *entry = opaque;
 288    PCIDevice *pdev = entry->pdev;
 289    IVShmemState *s = IVSHMEM_COMMON(pdev);
 290    int vector = entry - s->msi_vectors;
 291    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 292
 293    if (!event_notifier_test_and_clear(n)) {
 294        return;
 295    }
 296
 297    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
 298    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 299        if (msix_enabled(pdev)) {
 300            msix_notify(pdev, vector);
 301        }
 302    } else {
 303        ivshmem_IntrStatus_write(s, 1);
 304    }
 305}
 306
 307static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
 308                                 MSIMessage msg)
 309{
 310    IVShmemState *s = IVSHMEM_COMMON(dev);
 311    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 312    MSIVector *v = &s->msi_vectors[vector];
 313    int ret;
 314
 315    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
 316    if (!v->pdev) {
 317        error_report("ivshmem: vector %d route does not exist", vector);
 318        return -EINVAL;
 319    }
 320    assert(!v->unmasked);
 321
 322    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
 323    if (ret < 0) {
 324        return ret;
 325    }
 326    kvm_irqchip_commit_routes(kvm_state);
 327
 328    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 329    if (ret < 0) {
 330        return ret;
 331    }
 332    v->unmasked = true;
 333
 334    return 0;
 335}
 336
 337static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
 338{
 339    IVShmemState *s = IVSHMEM_COMMON(dev);
 340    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 341    MSIVector *v = &s->msi_vectors[vector];
 342    int ret;
 343
 344    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
 345    if (!v->pdev) {
 346        error_report("ivshmem: vector %d route does not exist", vector);
 347        return;
 348    }
 349    assert(v->unmasked);
 350
 351    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
 352    if (ret < 0) {
 353        error_report("remove_irqfd_notifier_gsi failed");
 354        return;
 355    }
 356    v->unmasked = false;
 357}
 358
 359static void ivshmem_vector_poll(PCIDevice *dev,
 360                                unsigned int vector_start,
 361                                unsigned int vector_end)
 362{
 363    IVShmemState *s = IVSHMEM_COMMON(dev);
 364    unsigned int vector;
 365
 366    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
 367
 368    vector_end = MIN(vector_end, s->vectors);
 369
 370    for (vector = vector_start; vector < vector_end; vector++) {
 371        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
 372
 373        if (!msix_is_masked(dev, vector)) {
 374            continue;
 375        }
 376
 377        if (event_notifier_test_and_clear(notifier)) {
 378            msix_set_pending(dev, vector);
 379        }
 380    }
 381}
 382
 383static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
 384                                 int vector)
 385{
 386    int eventfd = event_notifier_get_fd(n);
 387
 388    assert(!s->msi_vectors[vector].pdev);
 389    s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 390
 391    qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
 392                        NULL, &s->msi_vectors[vector]);
 393}
 394
 395static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
 396{
 397    memory_region_add_eventfd(&s->ivshmem_mmio,
 398                              DOORBELL,
 399                              4,
 400                              true,
 401                              (posn << 16) | i,
 402                              &s->peers[posn].eventfds[i]);
 403}
 404
 405static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
 406{
 407    memory_region_del_eventfd(&s->ivshmem_mmio,
 408                              DOORBELL,
 409                              4,
 410                              true,
 411                              (posn << 16) | i,
 412                              &s->peers[posn].eventfds[i]);
 413}
 414
 415static void close_peer_eventfds(IVShmemState *s, int posn)
 416{
 417    int i, n;
 418
 419    assert(posn >= 0 && posn < s->nb_peers);
 420    n = s->peers[posn].nb_eventfds;
 421
 422    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 423        memory_region_transaction_begin();
 424        for (i = 0; i < n; i++) {
 425            ivshmem_del_eventfd(s, posn, i);
 426        }
 427        memory_region_transaction_commit();
 428    }
 429
 430    for (i = 0; i < n; i++) {
 431        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
 432    }
 433
 434    g_free(s->peers[posn].eventfds);
 435    s->peers[posn].nb_eventfds = 0;
 436}
 437
 438static void resize_peers(IVShmemState *s, int nb_peers)
 439{
 440    int old_nb_peers = s->nb_peers;
 441    int i;
 442
 443    assert(nb_peers > old_nb_peers);
 444    IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
 445
 446    s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
 447    s->nb_peers = nb_peers;
 448
 449    for (i = old_nb_peers; i < nb_peers; i++) {
 450        s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
 451        s->peers[i].nb_eventfds = 0;
 452    }
 453}
 454
 455static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
 456                                     Error **errp)
 457{
 458    PCIDevice *pdev = PCI_DEVICE(s);
 459    int ret;
 460
 461    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 462    assert(!s->msi_vectors[vector].pdev);
 463
 464    ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
 465    if (ret < 0) {
 466        error_setg(errp, "kvm_irqchip_add_msi_route failed");
 467        return;
 468    }
 469
 470    s->msi_vectors[vector].virq = ret;
 471    s->msi_vectors[vector].pdev = pdev;
 472}
 473
 474static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 475{
 476    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 477    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
 478        ivshmem_has_feature(s, IVSHMEM_MSI);
 479    PCIDevice *pdev = PCI_DEVICE(s);
 480    Error *err = NULL;
 481
 482    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
 483
 484    if (!with_irqfd) {
 485        IVSHMEM_DPRINTF("with eventfd\n");
 486        watch_vector_notifier(s, n, vector);
 487    } else if (msix_enabled(pdev)) {
 488        IVSHMEM_DPRINTF("with irqfd\n");
 489        ivshmem_add_kvm_msi_virq(s, vector, &err);
 490        if (err) {
 491            error_propagate(errp, err);
 492            return;
 493        }
 494
 495        if (!msix_is_masked(pdev, vector)) {
 496            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
 497                                               s->msi_vectors[vector].virq);
 498            /* TODO handle error */
 499        }
 500    } else {
 501        /* it will be delayed until msix is enabled, in write_config */
 502        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
 503    }
 504}
 505
 506static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 507{
 508    Error *local_err = NULL;
 509    struct stat buf;
 510    size_t size;
 511
 512    if (s->ivshmem_bar2) {
 513        error_setg(errp, "server sent unexpected shared memory message");
 514        close(fd);
 515        return;
 516    }
 517
 518    if (fstat(fd, &buf) < 0) {
 519        error_setg_errno(errp, errno,
 520            "can't determine size of shared memory sent by server");
 521        close(fd);
 522        return;
 523    }
 524
 525    size = buf.st_size;
 526
 527    /* mmap the region and map into the BAR2 */
 528    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
 529                                   "ivshmem.bar2", size, true, fd, &local_err);
 530    if (local_err) {
 531        error_propagate(errp, local_err);
 532        return;
 533    }
 534
 535    s->ivshmem_bar2 = &s->server_bar2;
 536}
 537
 538static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
 539                                   Error **errp)
 540{
 541    IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
 542    if (posn >= s->nb_peers || posn == s->vm_id) {
 543        error_setg(errp, "invalid peer %d", posn);
 544        return;
 545    }
 546    close_peer_eventfds(s, posn);
 547}
 548
 549static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
 550                                Error **errp)
 551{
 552    Peer *peer = &s->peers[posn];
 553    int vector;
 554
 555    /*
 556     * The N-th connect message for this peer comes with the file
 557     * descriptor for vector N-1.  Count messages to find the vector.
 558     */
 559    if (peer->nb_eventfds >= s->vectors) {
 560        error_setg(errp, "Too many eventfd received, device has %d vectors",
 561                   s->vectors);
 562        close(fd);
 563        return;
 564    }
 565    vector = peer->nb_eventfds++;
 566
 567    IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
 568    event_notifier_init_fd(&peer->eventfds[vector], fd);
 569    fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
 570
 571    if (posn == s->vm_id) {
 572        setup_interrupt(s, vector, errp);
 573        /* TODO do we need to handle the error? */
 574    }
 575
 576    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 577        ivshmem_add_eventfd(s, posn, vector);
 578    }
 579}
 580
 581static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
 582{
 583    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
 584
 585    if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
 586        error_setg(errp, "server sent invalid message %" PRId64, msg);
 587        close(fd);
 588        return;
 589    }
 590
 591    if (msg == -1) {
 592        process_msg_shmem(s, fd, errp);
 593        return;
 594    }
 595
 596    if (msg >= s->nb_peers) {
 597        resize_peers(s, msg + 1);
 598    }
 599
 600    if (fd >= 0) {
 601        process_msg_connect(s, msg, fd, errp);
 602    } else {
 603        process_msg_disconnect(s, msg, errp);
 604    }
 605}
 606
 607static int ivshmem_can_receive(void *opaque)
 608{
 609    IVShmemState *s = opaque;
 610
 611    assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
 612    return sizeof(s->msg_buf) - s->msg_buffered_bytes;
 613}
 614
 615static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 616{
 617    IVShmemState *s = opaque;
 618    Error *err = NULL;
 619    int fd;
 620    int64_t msg;
 621
 622    assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
 623    memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
 624    s->msg_buffered_bytes += size;
 625    if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
 626        return;
 627    }
 628    msg = le64_to_cpu(s->msg_buf);
 629    s->msg_buffered_bytes = 0;
 630
 631    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 632
 633    process_msg(s, msg, fd, &err);
 634    if (err) {
 635        error_report_err(err);
 636    }
 637}
 638
 639static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 640{
 641    int64_t msg;
 642    int n, ret;
 643
 644    n = 0;
 645    do {
 646        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
 647                                   sizeof(msg) - n);
 648        if (ret < 0) {
 649            if (ret == -EINTR) {
 650                continue;
 651            }
 652            error_setg_errno(errp, -ret, "read from server failed");
 653            return INT64_MIN;
 654        }
 655        n += ret;
 656    } while (n < sizeof(msg));
 657
 658    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
 659    return le64_to_cpu(msg);
 660}
 661
 662static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
 663{
 664    Error *err = NULL;
 665    int64_t msg;
 666    int fd;
 667
 668    msg = ivshmem_recv_msg(s, &fd, &err);
 669    if (err) {
 670        error_propagate(errp, err);
 671        return;
 672    }
 673    if (msg != IVSHMEM_PROTOCOL_VERSION) {
 674        error_setg(errp, "server sent version %" PRId64 ", expecting %d",
 675                   msg, IVSHMEM_PROTOCOL_VERSION);
 676        return;
 677    }
 678    if (fd != -1) {
 679        error_setg(errp, "server sent invalid version message");
 680        return;
 681    }
 682
 683    /*
 684     * ivshmem-server sends the remaining initial messages in a fixed
 685     * order, but the device has always accepted them in any order.
 686     * Stay as compatible as practical, just in case people use
 687     * servers that behave differently.
 688     */
 689
 690    /*
 691     * ivshmem_device_spec.txt has always required the ID message
 692     * right here, and ivshmem-server has always complied.  However,
 693     * older versions of the device accepted it out of order, but
 694     * broke when an interrupt setup message arrived before it.
 695     */
 696    msg = ivshmem_recv_msg(s, &fd, &err);
 697    if (err) {
 698        error_propagate(errp, err);
 699        return;
 700    }
 701    if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
 702        error_setg(errp, "server sent invalid ID message");
 703        return;
 704    }
 705    s->vm_id = msg;
 706
 707    /*
 708     * Receive more messages until we got shared memory.
 709     */
 710    do {
 711        msg = ivshmem_recv_msg(s, &fd, &err);
 712        if (err) {
 713            error_propagate(errp, err);
 714            return;
 715        }
 716        process_msg(s, msg, fd, &err);
 717        if (err) {
 718            error_propagate(errp, err);
 719            return;
 720        }
 721    } while (msg != -1);
 722
 723    /*
 724     * This function must either map the shared memory or fail.  The
 725     * loop above ensures that: it terminates normally only after it
 726     * successfully processed the server's shared memory message.
 727     * Assert that actually mapped the shared memory:
 728     */
 729    assert(s->ivshmem_bar2);
 730}
 731
 732/* Select the MSI-X vectors used by device.
 733 * ivshmem maps events to vectors statically, so
 734 * we just enable all vectors on init and after reset. */
 735static void ivshmem_msix_vector_use(IVShmemState *s)
 736{
 737    PCIDevice *d = PCI_DEVICE(s);
 738    int i;
 739
 740    for (i = 0; i < s->vectors; i++) {
 741        msix_vector_use(d, i);
 742    }
 743}
 744
 745static void ivshmem_disable_irqfd(IVShmemState *s);
 746
 747static void ivshmem_reset(DeviceState *d)
 748{
 749    IVShmemState *s = IVSHMEM_COMMON(d);
 750
 751    ivshmem_disable_irqfd(s);
 752
 753    s->intrstatus = 0;
 754    s->intrmask = 0;
 755    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 756        ivshmem_msix_vector_use(s);
 757    }
 758}
 759
 760static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 761{
 762    /* allocate QEMU callback data for receiving interrupts */
 763    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 764
 765    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 766        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
 767            return -1;
 768        }
 769
 770        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 771        ivshmem_msix_vector_use(s);
 772    }
 773
 774    return 0;
 775}
 776
 777static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 778{
 779    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 780
 781    if (s->msi_vectors[vector].pdev == NULL) {
 782        return;
 783    }
 784
 785    /* it was cleaned when masked in the frontend. */
 786    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 787
 788    s->msi_vectors[vector].pdev = NULL;
 789}
 790
 791static void ivshmem_enable_irqfd(IVShmemState *s)
 792{
 793    PCIDevice *pdev = PCI_DEVICE(s);
 794    int i;
 795
 796    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 797        Error *err = NULL;
 798
 799        ivshmem_add_kvm_msi_virq(s, i, &err);
 800        if (err) {
 801            error_report_err(err);
 802            goto undo;
 803        }
 804    }
 805
 806    if (msix_set_vector_notifiers(pdev,
 807                                  ivshmem_vector_unmask,
 808                                  ivshmem_vector_mask,
 809                                  ivshmem_vector_poll)) {
 810        error_report("ivshmem: msix_set_vector_notifiers failed");
 811        goto undo;
 812    }
 813    return;
 814
 815undo:
 816    while (--i >= 0) {
 817        ivshmem_remove_kvm_msi_virq(s, i);
 818    }
 819}
 820
 821static void ivshmem_disable_irqfd(IVShmemState *s)
 822{
 823    PCIDevice *pdev = PCI_DEVICE(s);
 824    int i;
 825
 826    if (!pdev->msix_vector_use_notifier) {
 827        return;
 828    }
 829
 830    msix_unset_vector_notifiers(pdev);
 831
 832    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 833        /*
 834         * MSI-X is already disabled here so msix_unset_vector_notifiers()
 835         * didn't call our release notifier.  Do it now to keep our masks and
 836         * unmasks balanced.
 837         */
 838        if (s->msi_vectors[i].unmasked) {
 839            ivshmem_vector_mask(pdev, i);
 840        }
 841        ivshmem_remove_kvm_msi_virq(s, i);
 842    }
 843
 844}
 845
 846static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
 847                                 uint32_t val, int len)
 848{
 849    IVShmemState *s = IVSHMEM_COMMON(pdev);
 850    int is_enabled, was_enabled = msix_enabled(pdev);
 851
 852    pci_default_write_config(pdev, address, val, len);
 853    is_enabled = msix_enabled(pdev);
 854
 855    if (kvm_msi_via_irqfd_enabled()) {
 856        if (!was_enabled && is_enabled) {
 857            ivshmem_enable_irqfd(s);
 858        } else if (was_enabled && !is_enabled) {
 859            ivshmem_disable_irqfd(s);
 860        }
 861    }
 862}
 863
 864static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
 865{
 866    IVShmemState *s = IVSHMEM_COMMON(dev);
 867    Error *err = NULL;
 868    uint8_t *pci_conf;
 869    Error *local_err = NULL;
 870
 871    /* IRQFD requires MSI */
 872    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
 873        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
 874        error_setg(errp, "ioeventfd/irqfd requires MSI");
 875        return;
 876    }
 877
 878    pci_conf = dev->config;
 879    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
 880
 881    memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
 882                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 883
 884    /* region for registers*/
 885    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
 886                     &s->ivshmem_mmio);
 887
 888    if (s->hostmem != NULL) {
 889        IVSHMEM_DPRINTF("using hostmem\n");
 890
 891        s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
 892        host_memory_backend_set_mapped(s->hostmem, true);
 893    } else {
 894        Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
 895        assert(chr);
 896
 897        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
 898                        chr->filename);
 899
 900        /* we allocate enough space for 16 peers and grow as needed */
 901        resize_peers(s, 16);
 902
 903        /*
 904         * Receive setup messages from server synchronously.
 905         * Older versions did it asynchronously, but that creates a
 906         * number of entertaining race conditions.
 907         */
 908        ivshmem_recv_setup(s, &err);
 909        if (err) {
 910            error_propagate(errp, err);
 911            return;
 912        }
 913
 914        if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
 915            error_setg(errp,
 916                       "master must connect to the server before any peers");
 917            return;
 918        }
 919
 920        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
 921                                 ivshmem_read, NULL, NULL, s, NULL, true);
 922
 923        if (ivshmem_setup_interrupts(s, errp) < 0) {
 924            error_prepend(errp, "Failed to initialize interrupts: ");
 925            return;
 926        }
 927    }
 928
 929    if (s->master == ON_OFF_AUTO_AUTO) {
 930        s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 931    }
 932
 933    if (!ivshmem_is_master(s)) {
 934        error_setg(&s->migration_blocker,
 935                   "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
 936        migrate_add_blocker(s->migration_blocker, &local_err);
 937        if (local_err) {
 938            error_propagate(errp, local_err);
 939            error_free(s->migration_blocker);
 940            return;
 941        }
 942    }
 943
 944    vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
 945    pci_register_bar(PCI_DEVICE(s), 2,
 946                     PCI_BASE_ADDRESS_SPACE_MEMORY |
 947                     PCI_BASE_ADDRESS_MEM_PREFETCH |
 948                     PCI_BASE_ADDRESS_MEM_TYPE_64,
 949                     s->ivshmem_bar2);
 950}
 951
 952static void ivshmem_exit(PCIDevice *dev)
 953{
 954    IVShmemState *s = IVSHMEM_COMMON(dev);
 955    int i;
 956
 957    if (s->migration_blocker) {
 958        migrate_del_blocker(s->migration_blocker);
 959        error_free(s->migration_blocker);
 960    }
 961
 962    if (memory_region_is_mapped(s->ivshmem_bar2)) {
 963        if (!s->hostmem) {
 964            void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
 965            int fd;
 966
 967            if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
 968                error_report("Failed to munmap shared memory %s",
 969                             strerror(errno));
 970            }
 971
 972            fd = memory_region_get_fd(s->ivshmem_bar2);
 973            close(fd);
 974        }
 975
 976        vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
 977    }
 978
 979    if (s->hostmem) {
 980        host_memory_backend_set_mapped(s->hostmem, false);
 981    }
 982
 983    if (s->peers) {
 984        for (i = 0; i < s->nb_peers; i++) {
 985            close_peer_eventfds(s, i);
 986        }
 987        g_free(s->peers);
 988    }
 989
 990    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 991        msix_uninit_exclusive_bar(dev);
 992    }
 993
 994    g_free(s->msi_vectors);
 995}
 996
 997static int ivshmem_pre_load(void *opaque)
 998{
 999    IVShmemState *s = opaque;
1000
1001    if (!ivshmem_is_master(s)) {
1002        error_report("'peer' devices are not migratable");
1003        return -EINVAL;
1004    }
1005
1006    return 0;
1007}
1008
1009static int ivshmem_post_load(void *opaque, int version_id)
1010{
1011    IVShmemState *s = opaque;
1012
1013    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
1014        ivshmem_msix_vector_use(s);
1015    }
1016    return 0;
1017}
1018
1019static void ivshmem_common_class_init(ObjectClass *klass, void *data)
1020{
1021    DeviceClass *dc = DEVICE_CLASS(klass);
1022    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1023
1024    k->realize = ivshmem_common_realize;
1025    k->exit = ivshmem_exit;
1026    k->config_write = ivshmem_write_config;
1027    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
1028    k->device_id = PCI_DEVICE_ID_IVSHMEM;
1029    k->class_id = PCI_CLASS_MEMORY_RAM;
1030    k->revision = 1;
1031    dc->reset = ivshmem_reset;
1032    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1033    dc->desc = "Inter-VM shared memory";
1034}
1035
1036static const TypeInfo ivshmem_common_info = {
1037    .name          = TYPE_IVSHMEM_COMMON,
1038    .parent        = TYPE_PCI_DEVICE,
1039    .instance_size = sizeof(IVShmemState),
1040    .abstract      = true,
1041    .class_init    = ivshmem_common_class_init,
1042    .interfaces = (InterfaceInfo[]) {
1043        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1044        { },
1045    },
1046};
1047
1048static const VMStateDescription ivshmem_plain_vmsd = {
1049    .name = TYPE_IVSHMEM_PLAIN,
1050    .version_id = 0,
1051    .minimum_version_id = 0,
1052    .pre_load = ivshmem_pre_load,
1053    .post_load = ivshmem_post_load,
1054    .fields = (VMStateField[]) {
1055        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1056        VMSTATE_UINT32(intrstatus, IVShmemState),
1057        VMSTATE_UINT32(intrmask, IVShmemState),
1058        VMSTATE_END_OF_LIST()
1059    },
1060};
1061
1062static Property ivshmem_plain_properties[] = {
1063    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1064    DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1065                     HostMemoryBackend *),
1066    DEFINE_PROP_END_OF_LIST(),
1067};
1068
1069static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1070{
1071    IVShmemState *s = IVSHMEM_COMMON(dev);
1072
1073    if (!s->hostmem) {
1074        error_setg(errp, "You must specify a 'memdev'");
1075        return;
1076    } else if (host_memory_backend_is_mapped(s->hostmem)) {
1077        char *path = object_get_canonical_path_component(OBJECT(s->hostmem));
1078        error_setg(errp, "can't use already busy memdev: %s", path);
1079        g_free(path);
1080        return;
1081    }
1082
1083    ivshmem_common_realize(dev, errp);
1084}
1085
1086static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1087{
1088    DeviceClass *dc = DEVICE_CLASS(klass);
1089    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1090
1091    k->realize = ivshmem_plain_realize;
1092    dc->props = ivshmem_plain_properties;
1093    dc->vmsd = &ivshmem_plain_vmsd;
1094}
1095
1096static const TypeInfo ivshmem_plain_info = {
1097    .name          = TYPE_IVSHMEM_PLAIN,
1098    .parent        = TYPE_IVSHMEM_COMMON,
1099    .instance_size = sizeof(IVShmemState),
1100    .class_init    = ivshmem_plain_class_init,
1101};
1102
1103static const VMStateDescription ivshmem_doorbell_vmsd = {
1104    .name = TYPE_IVSHMEM_DOORBELL,
1105    .version_id = 0,
1106    .minimum_version_id = 0,
1107    .pre_load = ivshmem_pre_load,
1108    .post_load = ivshmem_post_load,
1109    .fields = (VMStateField[]) {
1110        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1111        VMSTATE_MSIX(parent_obj, IVShmemState),
1112        VMSTATE_UINT32(intrstatus, IVShmemState),
1113        VMSTATE_UINT32(intrmask, IVShmemState),
1114        VMSTATE_END_OF_LIST()
1115    },
1116};
1117
1118static Property ivshmem_doorbell_properties[] = {
1119    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1120    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1121    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1122                    true),
1123    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1124    DEFINE_PROP_END_OF_LIST(),
1125};
1126
1127static void ivshmem_doorbell_init(Object *obj)
1128{
1129    IVShmemState *s = IVSHMEM_DOORBELL(obj);
1130
1131    s->features |= (1 << IVSHMEM_MSI);
1132}
1133
1134static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1135{
1136    IVShmemState *s = IVSHMEM_COMMON(dev);
1137
1138    if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1139        error_setg(errp, "You must specify a 'chardev'");
1140        return;
1141    }
1142
1143    ivshmem_common_realize(dev, errp);
1144}
1145
1146static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1147{
1148    DeviceClass *dc = DEVICE_CLASS(klass);
1149    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1150
1151    k->realize = ivshmem_doorbell_realize;
1152    dc->props = ivshmem_doorbell_properties;
1153    dc->vmsd = &ivshmem_doorbell_vmsd;
1154}
1155
1156static const TypeInfo ivshmem_doorbell_info = {
1157    .name          = TYPE_IVSHMEM_DOORBELL,
1158    .parent        = TYPE_IVSHMEM_COMMON,
1159    .instance_size = sizeof(IVShmemState),
1160    .instance_init = ivshmem_doorbell_init,
1161    .class_init    = ivshmem_doorbell_class_init,
1162};
1163
1164static void ivshmem_register_types(void)
1165{
1166    type_register_static(&ivshmem_common_info);
1167    type_register_static(&ivshmem_plain_info);
1168    type_register_static(&ivshmem_doorbell_info);
1169}
1170
1171type_init(ivshmem_register_types)
1172