qemu/hw/misc/ivshmem.c
<<
>>
Prefs
   1/*
   2 * Inter-VM Shared Memory PCI device.
   3 *
   4 * Author:
   5 *      Cam Macdonell <cam@cs.ualberta.ca>
   6 *
   7 * Based On: cirrus_vga.c
   8 *          Copyright (c) 2004 Fabrice Bellard
   9 *          Copyright (c) 2004 Makoto Suzuki (suzu)
  10 *
  11 *      and rtl8139.c
  12 *          Copyright (c) 2006 Igor Kovalenko
  13 *
  14 * This code is licensed under the GNU GPL v2.
  15 *
  16 * Contributions after 2012-01-13 are licensed under the terms of the
  17 * GNU GPL, version 2 or (at your option) any later version.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "qapi/error.h"
  23#include "qemu/cutils.h"
  24#include "hw/pci/pci.h"
  25#include "hw/qdev-properties.h"
  26#include "hw/pci/msi.h"
  27#include "hw/pci/msix.h"
  28#include "sysemu/kvm.h"
  29#include "migration/blocker.h"
  30#include "migration/vmstate.h"
  31#include "qemu/error-report.h"
  32#include "qemu/event_notifier.h"
  33#include "qemu/module.h"
  34#include "qom/object_interfaces.h"
  35#include "chardev/char-fe.h"
  36#include "sysemu/hostmem.h"
  37#include "sysemu/qtest.h"
  38#include "qapi/visitor.h"
  39
  40#include "hw/misc/ivshmem.h"
  41
  42#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
  43#define PCI_DEVICE_ID_IVSHMEM   0x1110
  44
  45#define IVSHMEM_MAX_PEERS UINT16_MAX
  46#define IVSHMEM_IOEVENTFD   0
  47#define IVSHMEM_MSI     1
  48
  49#define IVSHMEM_REG_BAR_SIZE 0x100
  50
  51#define IVSHMEM_DEBUG 0
  52#define IVSHMEM_DPRINTF(fmt, ...)                       \
  53    do {                                                \
  54        if (IVSHMEM_DEBUG) {                            \
  55            printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
  56        }                                               \
  57    } while (0)
  58
  59#define TYPE_IVSHMEM_COMMON "ivshmem-common"
  60#define IVSHMEM_COMMON(obj) \
  61    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
  62
  63#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  64#define IVSHMEM_PLAIN(obj) \
  65    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
  66
  67#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  68#define IVSHMEM_DOORBELL(obj) \
  69    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
  70
  71#define TYPE_IVSHMEM "ivshmem"
  72#define IVSHMEM(obj) \
  73    OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
  74
  75typedef struct Peer {
  76    int nb_eventfds;
  77    EventNotifier *eventfds;
  78} Peer;
  79
  80typedef struct MSIVector {
  81    PCIDevice *pdev;
  82    int virq;
  83    bool unmasked;
  84} MSIVector;
  85
  86typedef struct IVShmemState {
  87    /*< private >*/
  88    PCIDevice parent_obj;
  89    /*< public >*/
  90
  91    uint32_t features;
  92
  93    /* exactly one of these two may be set */
  94    HostMemoryBackend *hostmem; /* with interrupts */
  95    CharBackend server_chr; /* without interrupts */
  96
  97    /* registers */
  98    uint32_t intrmask;
  99    uint32_t intrstatus;
 100    int vm_id;
 101
 102    /* BARs */
 103    MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
 104    MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
 105    MemoryRegion server_bar2;   /* used with server_chr */
 106
 107    /* interrupt support */
 108    Peer *peers;
 109    int nb_peers;               /* space in @peers[] */
 110    uint32_t vectors;
 111    MSIVector *msi_vectors;
 112    uint64_t msg_buf;           /* buffer for receiving server messages */
 113    int msg_buffered_bytes;     /* #bytes in @msg_buf */
 114
 115    /* migration stuff */
 116    OnOffAuto master;
 117    Error *migration_blocker;
 118} IVShmemState;
 119
 120/* registers for the Inter-VM shared memory device */
 121enum ivshmem_registers {
 122    INTRMASK = 0,
 123    INTRSTATUS = 4,
 124    IVPOSITION = 8,
 125    DOORBELL = 12,
 126};
 127
 128static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
 129                                                    unsigned int feature) {
 130    return (ivs->features & (1 << feature));
 131}
 132
 133static inline bool ivshmem_is_master(IVShmemState *s)
 134{
 135    assert(s->master != ON_OFF_AUTO_AUTO);
 136    return s->master == ON_OFF_AUTO_ON;
 137}
 138
 139static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 140{
 141    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
 142
 143    s->intrmask = val;
 144}
 145
 146static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
 147{
 148    uint32_t ret = s->intrmask;
 149
 150    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
 151    return ret;
 152}
 153
 154static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 155{
 156    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
 157
 158    s->intrstatus = val;
 159}
 160
 161static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
 162{
 163    uint32_t ret = s->intrstatus;
 164
 165    /* reading ISR clears all interrupts */
 166    s->intrstatus = 0;
 167    return ret;
 168}
 169
 170static void ivshmem_io_write(void *opaque, hwaddr addr,
 171                             uint64_t val, unsigned size)
 172{
 173    IVShmemState *s = opaque;
 174
 175    uint16_t dest = val >> 16;
 176    uint16_t vector = val & 0xff;
 177
 178    addr &= 0xfc;
 179
 180    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
 181    switch (addr)
 182    {
 183        case INTRMASK:
 184            ivshmem_IntrMask_write(s, val);
 185            break;
 186
 187        case INTRSTATUS:
 188            ivshmem_IntrStatus_write(s, val);
 189            break;
 190
 191        case DOORBELL:
 192            /* check that dest VM ID is reasonable */
 193            if (dest >= s->nb_peers) {
 194                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
 195                break;
 196            }
 197
 198            /* check doorbell range */
 199            if (vector < s->peers[dest].nb_eventfds) {
 200                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
 201                event_notifier_set(&s->peers[dest].eventfds[vector]);
 202            } else {
 203                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
 204                                vector, dest);
 205            }
 206            break;
 207        default:
 208            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
 209    }
 210}
 211
 212static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 213                                unsigned size)
 214{
 215
 216    IVShmemState *s = opaque;
 217    uint32_t ret;
 218
 219    switch (addr)
 220    {
 221        case INTRMASK:
 222            ret = ivshmem_IntrMask_read(s);
 223            break;
 224
 225        case INTRSTATUS:
 226            ret = ivshmem_IntrStatus_read(s);
 227            break;
 228
 229        case IVPOSITION:
 230            ret = s->vm_id;
 231            break;
 232
 233        default:
 234            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
 235            ret = 0;
 236    }
 237
 238    return ret;
 239}
 240
 241static const MemoryRegionOps ivshmem_mmio_ops = {
 242    .read = ivshmem_io_read,
 243    .write = ivshmem_io_write,
 244    .endianness = DEVICE_NATIVE_ENDIAN,
 245    .impl = {
 246        .min_access_size = 4,
 247        .max_access_size = 4,
 248    },
 249};
 250
 251static void ivshmem_vector_notify(void *opaque)
 252{
 253    MSIVector *entry = opaque;
 254    PCIDevice *pdev = entry->pdev;
 255    IVShmemState *s = IVSHMEM_COMMON(pdev);
 256    int vector = entry - s->msi_vectors;
 257    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 258
 259    if (!event_notifier_test_and_clear(n)) {
 260        return;
 261    }
 262
 263    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
 264    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 265        if (msix_enabled(pdev)) {
 266            msix_notify(pdev, vector);
 267        }
 268    } else {
 269        ivshmem_IntrStatus_write(s, 1);
 270    }
 271}
 272
 273static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
 274                                 MSIMessage msg)
 275{
 276    IVShmemState *s = IVSHMEM_COMMON(dev);
 277    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 278    MSIVector *v = &s->msi_vectors[vector];
 279    int ret;
 280
 281    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
 282    if (!v->pdev) {
 283        error_report("ivshmem: vector %d route does not exist", vector);
 284        return -EINVAL;
 285    }
 286    assert(!v->unmasked);
 287
 288    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
 289    if (ret < 0) {
 290        return ret;
 291    }
 292    kvm_irqchip_commit_routes(kvm_state);
 293
 294    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 295    if (ret < 0) {
 296        return ret;
 297    }
 298    v->unmasked = true;
 299
 300    return 0;
 301}
 302
 303static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
 304{
 305    IVShmemState *s = IVSHMEM_COMMON(dev);
 306    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 307    MSIVector *v = &s->msi_vectors[vector];
 308    int ret;
 309
 310    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
 311    if (!v->pdev) {
 312        error_report("ivshmem: vector %d route does not exist", vector);
 313        return;
 314    }
 315    assert(v->unmasked);
 316
 317    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
 318    if (ret < 0) {
 319        error_report("remove_irqfd_notifier_gsi failed");
 320        return;
 321    }
 322    v->unmasked = false;
 323}
 324
 325static void ivshmem_vector_poll(PCIDevice *dev,
 326                                unsigned int vector_start,
 327                                unsigned int vector_end)
 328{
 329    IVShmemState *s = IVSHMEM_COMMON(dev);
 330    unsigned int vector;
 331
 332    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
 333
 334    vector_end = MIN(vector_end, s->vectors);
 335
 336    for (vector = vector_start; vector < vector_end; vector++) {
 337        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
 338
 339        if (!msix_is_masked(dev, vector)) {
 340            continue;
 341        }
 342
 343        if (event_notifier_test_and_clear(notifier)) {
 344            msix_set_pending(dev, vector);
 345        }
 346    }
 347}
 348
 349static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
 350                                 int vector)
 351{
 352    int eventfd = event_notifier_get_fd(n);
 353
 354    assert(!s->msi_vectors[vector].pdev);
 355    s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 356
 357    qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
 358                        NULL, &s->msi_vectors[vector]);
 359}
 360
 361static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
 362{
 363    memory_region_add_eventfd(&s->ivshmem_mmio,
 364                              DOORBELL,
 365                              4,
 366                              true,
 367                              (posn << 16) | i,
 368                              &s->peers[posn].eventfds[i]);
 369}
 370
 371static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
 372{
 373    memory_region_del_eventfd(&s->ivshmem_mmio,
 374                              DOORBELL,
 375                              4,
 376                              true,
 377                              (posn << 16) | i,
 378                              &s->peers[posn].eventfds[i]);
 379}
 380
 381static void close_peer_eventfds(IVShmemState *s, int posn)
 382{
 383    int i, n;
 384
 385    assert(posn >= 0 && posn < s->nb_peers);
 386    n = s->peers[posn].nb_eventfds;
 387
 388    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 389        memory_region_transaction_begin();
 390        for (i = 0; i < n; i++) {
 391            ivshmem_del_eventfd(s, posn, i);
 392        }
 393        memory_region_transaction_commit();
 394    }
 395
 396    for (i = 0; i < n; i++) {
 397        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
 398    }
 399
 400    g_free(s->peers[posn].eventfds);
 401    s->peers[posn].nb_eventfds = 0;
 402}
 403
 404static void resize_peers(IVShmemState *s, int nb_peers)
 405{
 406    int old_nb_peers = s->nb_peers;
 407    int i;
 408
 409    assert(nb_peers > old_nb_peers);
 410    IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
 411
 412    s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
 413    s->nb_peers = nb_peers;
 414
 415    for (i = old_nb_peers; i < nb_peers; i++) {
 416        s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
 417        s->peers[i].nb_eventfds = 0;
 418    }
 419}
 420
 421static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
 422                                     Error **errp)
 423{
 424    PCIDevice *pdev = PCI_DEVICE(s);
 425    int ret;
 426
 427    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 428    assert(!s->msi_vectors[vector].pdev);
 429
 430    ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
 431    if (ret < 0) {
 432        error_setg(errp, "kvm_irqchip_add_msi_route failed");
 433        return;
 434    }
 435
 436    s->msi_vectors[vector].virq = ret;
 437    s->msi_vectors[vector].pdev = pdev;
 438}
 439
 440static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 441{
 442    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 443    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
 444        ivshmem_has_feature(s, IVSHMEM_MSI);
 445    PCIDevice *pdev = PCI_DEVICE(s);
 446    Error *err = NULL;
 447
 448    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
 449
 450    if (!with_irqfd) {
 451        IVSHMEM_DPRINTF("with eventfd\n");
 452        watch_vector_notifier(s, n, vector);
 453    } else if (msix_enabled(pdev)) {
 454        IVSHMEM_DPRINTF("with irqfd\n");
 455        ivshmem_add_kvm_msi_virq(s, vector, &err);
 456        if (err) {
 457            error_propagate(errp, err);
 458            return;
 459        }
 460
 461        if (!msix_is_masked(pdev, vector)) {
 462            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
 463                                               s->msi_vectors[vector].virq);
 464            /* TODO handle error */
 465        }
 466    } else {
 467        /* it will be delayed until msix is enabled, in write_config */
 468        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
 469    }
 470}
 471
 472static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 473{
 474    Error *local_err = NULL;
 475    struct stat buf;
 476    size_t size;
 477
 478    if (s->ivshmem_bar2) {
 479        error_setg(errp, "server sent unexpected shared memory message");
 480        close(fd);
 481        return;
 482    }
 483
 484    if (fstat(fd, &buf) < 0) {
 485        error_setg_errno(errp, errno,
 486            "can't determine size of shared memory sent by server");
 487        close(fd);
 488        return;
 489    }
 490
 491    size = buf.st_size;
 492
 493    /* mmap the region and map into the BAR2 */
 494    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
 495                                   "ivshmem.bar2", size, true, fd, &local_err);
 496    if (local_err) {
 497        error_propagate(errp, local_err);
 498        return;
 499    }
 500
 501    s->ivshmem_bar2 = &s->server_bar2;
 502}
 503
 504static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
 505                                   Error **errp)
 506{
 507    IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
 508    if (posn >= s->nb_peers || posn == s->vm_id) {
 509        error_setg(errp, "invalid peer %d", posn);
 510        return;
 511    }
 512    close_peer_eventfds(s, posn);
 513}
 514
 515static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
 516                                Error **errp)
 517{
 518    Peer *peer = &s->peers[posn];
 519    int vector;
 520
 521    /*
 522     * The N-th connect message for this peer comes with the file
 523     * descriptor for vector N-1.  Count messages to find the vector.
 524     */
 525    if (peer->nb_eventfds >= s->vectors) {
 526        error_setg(errp, "Too many eventfd received, device has %d vectors",
 527                   s->vectors);
 528        close(fd);
 529        return;
 530    }
 531    vector = peer->nb_eventfds++;
 532
 533    IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
 534    event_notifier_init_fd(&peer->eventfds[vector], fd);
 535    fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
 536
 537    if (posn == s->vm_id) {
 538        setup_interrupt(s, vector, errp);
 539        /* TODO do we need to handle the error? */
 540    }
 541
 542    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 543        ivshmem_add_eventfd(s, posn, vector);
 544    }
 545}
 546
 547static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
 548{
 549    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
 550
 551    if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
 552        error_setg(errp, "server sent invalid message %" PRId64, msg);
 553        close(fd);
 554        return;
 555    }
 556
 557    if (msg == -1) {
 558        process_msg_shmem(s, fd, errp);
 559        return;
 560    }
 561
 562    if (msg >= s->nb_peers) {
 563        resize_peers(s, msg + 1);
 564    }
 565
 566    if (fd >= 0) {
 567        process_msg_connect(s, msg, fd, errp);
 568    } else {
 569        process_msg_disconnect(s, msg, errp);
 570    }
 571}
 572
 573static int ivshmem_can_receive(void *opaque)
 574{
 575    IVShmemState *s = opaque;
 576
 577    assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
 578    return sizeof(s->msg_buf) - s->msg_buffered_bytes;
 579}
 580
 581static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 582{
 583    IVShmemState *s = opaque;
 584    Error *err = NULL;
 585    int fd;
 586    int64_t msg;
 587
 588    assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
 589    memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
 590    s->msg_buffered_bytes += size;
 591    if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
 592        return;
 593    }
 594    msg = le64_to_cpu(s->msg_buf);
 595    s->msg_buffered_bytes = 0;
 596
 597    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 598
 599    process_msg(s, msg, fd, &err);
 600    if (err) {
 601        error_report_err(err);
 602    }
 603}
 604
 605static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 606{
 607    int64_t msg;
 608    int n, ret;
 609
 610    n = 0;
 611    do {
 612        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
 613                                   sizeof(msg) - n);
 614        if (ret < 0) {
 615            if (ret == -EINTR) {
 616                continue;
 617            }
 618            error_setg_errno(errp, -ret, "read from server failed");
 619            return INT64_MIN;
 620        }
 621        n += ret;
 622    } while (n < sizeof(msg));
 623
 624    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
 625    return le64_to_cpu(msg);
 626}
 627
 628static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
 629{
 630    Error *err = NULL;
 631    int64_t msg;
 632    int fd;
 633
 634    msg = ivshmem_recv_msg(s, &fd, &err);
 635    if (err) {
 636        error_propagate(errp, err);
 637        return;
 638    }
 639    if (msg != IVSHMEM_PROTOCOL_VERSION) {
 640        error_setg(errp, "server sent version %" PRId64 ", expecting %d",
 641                   msg, IVSHMEM_PROTOCOL_VERSION);
 642        return;
 643    }
 644    if (fd != -1) {
 645        error_setg(errp, "server sent invalid version message");
 646        return;
 647    }
 648
 649    /*
 650     * ivshmem-server sends the remaining initial messages in a fixed
 651     * order, but the device has always accepted them in any order.
 652     * Stay as compatible as practical, just in case people use
 653     * servers that behave differently.
 654     */
 655
 656    /*
 657     * ivshmem_device_spec.txt has always required the ID message
 658     * right here, and ivshmem-server has always complied.  However,
 659     * older versions of the device accepted it out of order, but
 660     * broke when an interrupt setup message arrived before it.
 661     */
 662    msg = ivshmem_recv_msg(s, &fd, &err);
 663    if (err) {
 664        error_propagate(errp, err);
 665        return;
 666    }
 667    if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
 668        error_setg(errp, "server sent invalid ID message");
 669        return;
 670    }
 671    s->vm_id = msg;
 672
 673    /*
 674     * Receive more messages until we got shared memory.
 675     */
 676    do {
 677        msg = ivshmem_recv_msg(s, &fd, &err);
 678        if (err) {
 679            error_propagate(errp, err);
 680            return;
 681        }
 682        process_msg(s, msg, fd, &err);
 683        if (err) {
 684            error_propagate(errp, err);
 685            return;
 686        }
 687    } while (msg != -1);
 688
 689    /*
 690     * This function must either map the shared memory or fail.  The
 691     * loop above ensures that: it terminates normally only after it
 692     * successfully processed the server's shared memory message.
 693     * Assert that actually mapped the shared memory:
 694     */
 695    assert(s->ivshmem_bar2);
 696}
 697
 698/* Select the MSI-X vectors used by device.
 699 * ivshmem maps events to vectors statically, so
 700 * we just enable all vectors on init and after reset. */
 701static void ivshmem_msix_vector_use(IVShmemState *s)
 702{
 703    PCIDevice *d = PCI_DEVICE(s);
 704    int i;
 705
 706    for (i = 0; i < s->vectors; i++) {
 707        msix_vector_use(d, i);
 708    }
 709}
 710
 711static void ivshmem_disable_irqfd(IVShmemState *s);
 712
 713static void ivshmem_reset(DeviceState *d)
 714{
 715    IVShmemState *s = IVSHMEM_COMMON(d);
 716
 717    ivshmem_disable_irqfd(s);
 718
 719    s->intrstatus = 0;
 720    s->intrmask = 0;
 721    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 722        ivshmem_msix_vector_use(s);
 723    }
 724}
 725
 726static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 727{
 728    /* allocate QEMU callback data for receiving interrupts */
 729    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 730
 731    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 732        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
 733            return -1;
 734        }
 735
 736        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 737        ivshmem_msix_vector_use(s);
 738    }
 739
 740    return 0;
 741}
 742
 743static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 744{
 745    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 746
 747    if (s->msi_vectors[vector].pdev == NULL) {
 748        return;
 749    }
 750
 751    /* it was cleaned when masked in the frontend. */
 752    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 753
 754    s->msi_vectors[vector].pdev = NULL;
 755}
 756
 757static void ivshmem_enable_irqfd(IVShmemState *s)
 758{
 759    PCIDevice *pdev = PCI_DEVICE(s);
 760    int i;
 761
 762    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 763        Error *err = NULL;
 764
 765        ivshmem_add_kvm_msi_virq(s, i, &err);
 766        if (err) {
 767            error_report_err(err);
 768            goto undo;
 769        }
 770    }
 771
 772    if (msix_set_vector_notifiers(pdev,
 773                                  ivshmem_vector_unmask,
 774                                  ivshmem_vector_mask,
 775                                  ivshmem_vector_poll)) {
 776        error_report("ivshmem: msix_set_vector_notifiers failed");
 777        goto undo;
 778    }
 779    return;
 780
 781undo:
 782    while (--i >= 0) {
 783        ivshmem_remove_kvm_msi_virq(s, i);
 784    }
 785}
 786
 787static void ivshmem_disable_irqfd(IVShmemState *s)
 788{
 789    PCIDevice *pdev = PCI_DEVICE(s);
 790    int i;
 791
 792    if (!pdev->msix_vector_use_notifier) {
 793        return;
 794    }
 795
 796    msix_unset_vector_notifiers(pdev);
 797
 798    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 799        /*
 800         * MSI-X is already disabled here so msix_unset_vector_notifiers()
 801         * didn't call our release notifier.  Do it now to keep our masks and
 802         * unmasks balanced.
 803         */
 804        if (s->msi_vectors[i].unmasked) {
 805            ivshmem_vector_mask(pdev, i);
 806        }
 807        ivshmem_remove_kvm_msi_virq(s, i);
 808    }
 809
 810}
 811
 812static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
 813                                 uint32_t val, int len)
 814{
 815    IVShmemState *s = IVSHMEM_COMMON(pdev);
 816    int is_enabled, was_enabled = msix_enabled(pdev);
 817
 818    pci_default_write_config(pdev, address, val, len);
 819    is_enabled = msix_enabled(pdev);
 820
 821    if (kvm_msi_via_irqfd_enabled()) {
 822        if (!was_enabled && is_enabled) {
 823            ivshmem_enable_irqfd(s);
 824        } else if (was_enabled && !is_enabled) {
 825            ivshmem_disable_irqfd(s);
 826        }
 827    }
 828}
 829
 830static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
 831{
 832    IVShmemState *s = IVSHMEM_COMMON(dev);
 833    Error *err = NULL;
 834    uint8_t *pci_conf;
 835
 836    /* IRQFD requires MSI */
 837    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
 838        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
 839        error_setg(errp, "ioeventfd/irqfd requires MSI");
 840        return;
 841    }
 842
 843    pci_conf = dev->config;
 844    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
 845
 846    memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
 847                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 848
 849    /* region for registers*/
 850    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
 851                     &s->ivshmem_mmio);
 852
 853    if (s->hostmem != NULL) {
 854        IVSHMEM_DPRINTF("using hostmem\n");
 855
 856        s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
 857        host_memory_backend_set_mapped(s->hostmem, true);
 858    } else {
 859        Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
 860        assert(chr);
 861
 862        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
 863                        chr->filename);
 864
 865        /* we allocate enough space for 16 peers and grow as needed */
 866        resize_peers(s, 16);
 867
 868        /*
 869         * Receive setup messages from server synchronously.
 870         * Older versions did it asynchronously, but that creates a
 871         * number of entertaining race conditions.
 872         */
 873        ivshmem_recv_setup(s, &err);
 874        if (err) {
 875            error_propagate(errp, err);
 876            return;
 877        }
 878
 879        if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
 880            error_setg(errp,
 881                       "master must connect to the server before any peers");
 882            return;
 883        }
 884
 885        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
 886                                 ivshmem_read, NULL, NULL, s, NULL, true);
 887
 888        if (ivshmem_setup_interrupts(s, errp) < 0) {
 889            error_prepend(errp, "Failed to initialize interrupts: ");
 890            return;
 891        }
 892    }
 893
 894    if (s->master == ON_OFF_AUTO_AUTO) {
 895        s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 896    }
 897
 898    if (!ivshmem_is_master(s)) {
 899        error_setg(&s->migration_blocker,
 900                   "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
 901        if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
 902            error_free(s->migration_blocker);
 903            return;
 904        }
 905    }
 906
 907    vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
 908    pci_register_bar(PCI_DEVICE(s), 2,
 909                     PCI_BASE_ADDRESS_SPACE_MEMORY |
 910                     PCI_BASE_ADDRESS_MEM_PREFETCH |
 911                     PCI_BASE_ADDRESS_MEM_TYPE_64,
 912                     s->ivshmem_bar2);
 913}
 914
 915static void ivshmem_exit(PCIDevice *dev)
 916{
 917    IVShmemState *s = IVSHMEM_COMMON(dev);
 918    int i;
 919
 920    if (s->migration_blocker) {
 921        migrate_del_blocker(s->migration_blocker);
 922        error_free(s->migration_blocker);
 923    }
 924
 925    if (memory_region_is_mapped(s->ivshmem_bar2)) {
 926        if (!s->hostmem) {
 927            void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
 928            int fd;
 929
 930            if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
 931                error_report("Failed to munmap shared memory %s",
 932                             strerror(errno));
 933            }
 934
 935            fd = memory_region_get_fd(s->ivshmem_bar2);
 936            close(fd);
 937        }
 938
 939        vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
 940    }
 941
 942    if (s->hostmem) {
 943        host_memory_backend_set_mapped(s->hostmem, false);
 944    }
 945
 946    if (s->peers) {
 947        for (i = 0; i < s->nb_peers; i++) {
 948            close_peer_eventfds(s, i);
 949        }
 950        g_free(s->peers);
 951    }
 952
 953    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 954        msix_uninit_exclusive_bar(dev);
 955    }
 956
 957    g_free(s->msi_vectors);
 958}
 959
 960static int ivshmem_pre_load(void *opaque)
 961{
 962    IVShmemState *s = opaque;
 963
 964    if (!ivshmem_is_master(s)) {
 965        error_report("'peer' devices are not migratable");
 966        return -EINVAL;
 967    }
 968
 969    return 0;
 970}
 971
 972static int ivshmem_post_load(void *opaque, int version_id)
 973{
 974    IVShmemState *s = opaque;
 975
 976    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 977        ivshmem_msix_vector_use(s);
 978    }
 979    return 0;
 980}
 981
 982static void ivshmem_common_class_init(ObjectClass *klass, void *data)
 983{
 984    DeviceClass *dc = DEVICE_CLASS(klass);
 985    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 986
 987    k->realize = ivshmem_common_realize;
 988    k->exit = ivshmem_exit;
 989    k->config_write = ivshmem_write_config;
 990    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
 991    k->device_id = PCI_DEVICE_ID_IVSHMEM;
 992    k->class_id = PCI_CLASS_MEMORY_RAM;
 993    k->revision = 1;
 994    dc->reset = ivshmem_reset;
 995    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 996    dc->desc = "Inter-VM shared memory";
 997}
 998
 999static const TypeInfo ivshmem_common_info = {
1000    .name          = TYPE_IVSHMEM_COMMON,
1001    .parent        = TYPE_PCI_DEVICE,
1002    .instance_size = sizeof(IVShmemState),
1003    .abstract      = true,
1004    .class_init    = ivshmem_common_class_init,
1005    .interfaces = (InterfaceInfo[]) {
1006        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1007        { },
1008    },
1009};
1010
1011static const VMStateDescription ivshmem_plain_vmsd = {
1012    .name = TYPE_IVSHMEM_PLAIN,
1013    .version_id = 0,
1014    .minimum_version_id = 0,
1015    .pre_load = ivshmem_pre_load,
1016    .post_load = ivshmem_post_load,
1017    .fields = (VMStateField[]) {
1018        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1019        VMSTATE_UINT32(intrstatus, IVShmemState),
1020        VMSTATE_UINT32(intrmask, IVShmemState),
1021        VMSTATE_END_OF_LIST()
1022    },
1023};
1024
1025static Property ivshmem_plain_properties[] = {
1026    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1027    DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1028                     HostMemoryBackend *),
1029    DEFINE_PROP_END_OF_LIST(),
1030};
1031
1032static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1033{
1034    IVShmemState *s = IVSHMEM_COMMON(dev);
1035
1036    if (!s->hostmem) {
1037        error_setg(errp, "You must specify a 'memdev'");
1038        return;
1039    } else if (host_memory_backend_is_mapped(s->hostmem)) {
1040        error_setg(errp, "can't use already busy memdev: %s",
1041                   object_get_canonical_path_component(OBJECT(s->hostmem)));
1042        return;
1043    }
1044
1045    ivshmem_common_realize(dev, errp);
1046}
1047
1048static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1049{
1050    DeviceClass *dc = DEVICE_CLASS(klass);
1051    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1052
1053    k->realize = ivshmem_plain_realize;
1054    device_class_set_props(dc, ivshmem_plain_properties);
1055    dc->vmsd = &ivshmem_plain_vmsd;
1056}
1057
1058static const TypeInfo ivshmem_plain_info = {
1059    .name          = TYPE_IVSHMEM_PLAIN,
1060    .parent        = TYPE_IVSHMEM_COMMON,
1061    .instance_size = sizeof(IVShmemState),
1062    .class_init    = ivshmem_plain_class_init,
1063};
1064
1065static const VMStateDescription ivshmem_doorbell_vmsd = {
1066    .name = TYPE_IVSHMEM_DOORBELL,
1067    .version_id = 0,
1068    .minimum_version_id = 0,
1069    .pre_load = ivshmem_pre_load,
1070    .post_load = ivshmem_post_load,
1071    .fields = (VMStateField[]) {
1072        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1073        VMSTATE_MSIX(parent_obj, IVShmemState),
1074        VMSTATE_UINT32(intrstatus, IVShmemState),
1075        VMSTATE_UINT32(intrmask, IVShmemState),
1076        VMSTATE_END_OF_LIST()
1077    },
1078};
1079
1080static Property ivshmem_doorbell_properties[] = {
1081    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1082    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1083    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1084                    true),
1085    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1086    DEFINE_PROP_END_OF_LIST(),
1087};
1088
1089static void ivshmem_doorbell_init(Object *obj)
1090{
1091    IVShmemState *s = IVSHMEM_DOORBELL(obj);
1092
1093    s->features |= (1 << IVSHMEM_MSI);
1094}
1095
1096static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1097{
1098    IVShmemState *s = IVSHMEM_COMMON(dev);
1099
1100    if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1101        error_setg(errp, "You must specify a 'chardev'");
1102        return;
1103    }
1104
1105    ivshmem_common_realize(dev, errp);
1106}
1107
1108static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1109{
1110    DeviceClass *dc = DEVICE_CLASS(klass);
1111    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1112
1113    k->realize = ivshmem_doorbell_realize;
1114    device_class_set_props(dc, ivshmem_doorbell_properties);
1115    dc->vmsd = &ivshmem_doorbell_vmsd;
1116}
1117
1118static const TypeInfo ivshmem_doorbell_info = {
1119    .name          = TYPE_IVSHMEM_DOORBELL,
1120    .parent        = TYPE_IVSHMEM_COMMON,
1121    .instance_size = sizeof(IVShmemState),
1122    .instance_init = ivshmem_doorbell_init,
1123    .class_init    = ivshmem_doorbell_class_init,
1124};
1125
1126static void ivshmem_register_types(void)
1127{
1128    type_register_static(&ivshmem_common_info);
1129    type_register_static(&ivshmem_plain_info);
1130    type_register_static(&ivshmem_doorbell_info);
1131}
1132
1133type_init(ivshmem_register_types)
1134