qemu/hw/misc/ivshmem.c
<<
>>
Prefs
   1/*
   2 * Inter-VM Shared Memory PCI device.
   3 *
   4 * Author:
   5 *      Cam Macdonell <cam@cs.ualberta.ca>
   6 *
   7 * Based On: cirrus_vga.c
   8 *          Copyright (c) 2004 Fabrice Bellard
   9 *          Copyright (c) 2004 Makoto Suzuki (suzu)
  10 *
  11 *      and rtl8139.c
  12 *          Copyright (c) 2006 Igor Kovalenko
  13 *
  14 * This code is licensed under the GNU GPL v2.
  15 *
  16 * Contributions after 2012-01-13 are licensed under the terms of the
  17 * GNU GPL, version 2 or (at your option) any later version.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "qapi/error.h"
  23#include "qemu/cutils.h"
  24#include "hw/pci/pci.h"
  25#include "hw/qdev-properties.h"
  26#include "hw/qdev-properties-system.h"
  27#include "hw/pci/msi.h"
  28#include "hw/pci/msix.h"
  29#include "sysemu/kvm.h"
  30#include "migration/blocker.h"
  31#include "migration/vmstate.h"
  32#include "qemu/error-report.h"
  33#include "qemu/event_notifier.h"
  34#include "qemu/module.h"
  35#include "qom/object_interfaces.h"
  36#include "chardev/char-fe.h"
  37#include "sysemu/hostmem.h"
  38#include "qapi/visitor.h"
  39
  40#include "hw/misc/ivshmem.h"
  41#include "qom/object.h"
  42
  43#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
  44#define PCI_DEVICE_ID_IVSHMEM   0x1110
  45
  46#define IVSHMEM_MAX_PEERS UINT16_MAX
  47#define IVSHMEM_IOEVENTFD   0
  48#define IVSHMEM_MSI     1
  49
  50#define IVSHMEM_REG_BAR_SIZE 0x100
  51
  52#define IVSHMEM_DEBUG 0
  53#define IVSHMEM_DPRINTF(fmt, ...)                       \
  54    do {                                                \
  55        if (IVSHMEM_DEBUG) {                            \
  56            printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
  57        }                                               \
  58    } while (0)
  59
  60#define TYPE_IVSHMEM_COMMON "ivshmem-common"
  61typedef struct IVShmemState IVShmemState;
  62DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_COMMON,
  63                         TYPE_IVSHMEM_COMMON)
  64
  65#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  66DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_PLAIN,
  67                         TYPE_IVSHMEM_PLAIN)
  68
  69#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  70DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_DOORBELL,
  71                         TYPE_IVSHMEM_DOORBELL)
  72
  73#define TYPE_IVSHMEM "ivshmem"
  74DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM,
  75                         TYPE_IVSHMEM)
  76
  77typedef struct Peer {
  78    int nb_eventfds;
  79    EventNotifier *eventfds;
  80} Peer;
  81
  82typedef struct MSIVector {
  83    PCIDevice *pdev;
  84    int virq;
  85    bool unmasked;
  86} MSIVector;
  87
  88struct IVShmemState {
  89    /*< private >*/
  90    PCIDevice parent_obj;
  91    /*< public >*/
  92
  93    uint32_t features;
  94
  95    /* exactly one of these two may be set */
  96    HostMemoryBackend *hostmem; /* with interrupts */
  97    CharBackend server_chr; /* without interrupts */
  98
  99    /* registers */
 100    uint32_t intrmask;
 101    uint32_t intrstatus;
 102    int vm_id;
 103
 104    /* BARs */
 105    MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
 106    MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
 107    MemoryRegion server_bar2;   /* used with server_chr */
 108
 109    /* interrupt support */
 110    Peer *peers;
 111    int nb_peers;               /* space in @peers[] */
 112    uint32_t vectors;
 113    MSIVector *msi_vectors;
 114    uint64_t msg_buf;           /* buffer for receiving server messages */
 115    int msg_buffered_bytes;     /* #bytes in @msg_buf */
 116
 117    /* migration stuff */
 118    OnOffAuto master;
 119    Error *migration_blocker;
 120};
 121
 122/* registers for the Inter-VM shared memory device */
 123enum ivshmem_registers {
 124    INTRMASK = 0,
 125    INTRSTATUS = 4,
 126    IVPOSITION = 8,
 127    DOORBELL = 12,
 128};
 129
 130static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
 131                                                    unsigned int feature) {
 132    return (ivs->features & (1 << feature));
 133}
 134
 135static inline bool ivshmem_is_master(IVShmemState *s)
 136{
 137    assert(s->master != ON_OFF_AUTO_AUTO);
 138    return s->master == ON_OFF_AUTO_ON;
 139}
 140
 141static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 142{
 143    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
 144
 145    s->intrmask = val;
 146}
 147
 148static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
 149{
 150    uint32_t ret = s->intrmask;
 151
 152    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
 153    return ret;
 154}
 155
 156static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 157{
 158    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
 159
 160    s->intrstatus = val;
 161}
 162
 163static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
 164{
 165    uint32_t ret = s->intrstatus;
 166
 167    /* reading ISR clears all interrupts */
 168    s->intrstatus = 0;
 169    return ret;
 170}
 171
 172static void ivshmem_io_write(void *opaque, hwaddr addr,
 173                             uint64_t val, unsigned size)
 174{
 175    IVShmemState *s = opaque;
 176
 177    uint16_t dest = val >> 16;
 178    uint16_t vector = val & 0xff;
 179
 180    addr &= 0xfc;
 181
 182    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
 183    switch (addr)
 184    {
 185        case INTRMASK:
 186            ivshmem_IntrMask_write(s, val);
 187            break;
 188
 189        case INTRSTATUS:
 190            ivshmem_IntrStatus_write(s, val);
 191            break;
 192
 193        case DOORBELL:
 194            /* check that dest VM ID is reasonable */
 195            if (dest >= s->nb_peers) {
 196                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
 197                break;
 198            }
 199
 200            /* check doorbell range */
 201            if (vector < s->peers[dest].nb_eventfds) {
 202                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
 203                event_notifier_set(&s->peers[dest].eventfds[vector]);
 204            } else {
 205                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
 206                                vector, dest);
 207            }
 208            break;
 209        default:
 210            IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
 211    }
 212}
 213
 214static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 215                                unsigned size)
 216{
 217
 218    IVShmemState *s = opaque;
 219    uint32_t ret;
 220
 221    switch (addr)
 222    {
 223        case INTRMASK:
 224            ret = ivshmem_IntrMask_read(s);
 225            break;
 226
 227        case INTRSTATUS:
 228            ret = ivshmem_IntrStatus_read(s);
 229            break;
 230
 231        case IVPOSITION:
 232            ret = s->vm_id;
 233            break;
 234
 235        default:
 236            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
 237            ret = 0;
 238    }
 239
 240    return ret;
 241}
 242
 243static const MemoryRegionOps ivshmem_mmio_ops = {
 244    .read = ivshmem_io_read,
 245    .write = ivshmem_io_write,
 246    .endianness = DEVICE_NATIVE_ENDIAN,
 247    .impl = {
 248        .min_access_size = 4,
 249        .max_access_size = 4,
 250    },
 251};
 252
 253static void ivshmem_vector_notify(void *opaque)
 254{
 255    MSIVector *entry = opaque;
 256    PCIDevice *pdev = entry->pdev;
 257    IVShmemState *s = IVSHMEM_COMMON(pdev);
 258    int vector = entry - s->msi_vectors;
 259    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 260
 261    if (!event_notifier_test_and_clear(n)) {
 262        return;
 263    }
 264
 265    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
 266    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 267        if (msix_enabled(pdev)) {
 268            msix_notify(pdev, vector);
 269        }
 270    } else {
 271        ivshmem_IntrStatus_write(s, 1);
 272    }
 273}
 274
 275static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
 276                                 MSIMessage msg)
 277{
 278    IVShmemState *s = IVSHMEM_COMMON(dev);
 279    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 280    MSIVector *v = &s->msi_vectors[vector];
 281    int ret;
 282
 283    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
 284    if (!v->pdev) {
 285        error_report("ivshmem: vector %d route does not exist", vector);
 286        return -EINVAL;
 287    }
 288    assert(!v->unmasked);
 289
 290    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
 291    if (ret < 0) {
 292        return ret;
 293    }
 294    kvm_irqchip_commit_routes(kvm_state);
 295
 296    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 297    if (ret < 0) {
 298        return ret;
 299    }
 300    v->unmasked = true;
 301
 302    return 0;
 303}
 304
 305static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
 306{
 307    IVShmemState *s = IVSHMEM_COMMON(dev);
 308    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 309    MSIVector *v = &s->msi_vectors[vector];
 310    int ret;
 311
 312    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
 313    if (!v->pdev) {
 314        error_report("ivshmem: vector %d route does not exist", vector);
 315        return;
 316    }
 317    assert(v->unmasked);
 318
 319    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
 320    if (ret < 0) {
 321        error_report("remove_irqfd_notifier_gsi failed");
 322        return;
 323    }
 324    v->unmasked = false;
 325}
 326
 327static void ivshmem_vector_poll(PCIDevice *dev,
 328                                unsigned int vector_start,
 329                                unsigned int vector_end)
 330{
 331    IVShmemState *s = IVSHMEM_COMMON(dev);
 332    unsigned int vector;
 333
 334    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
 335
 336    vector_end = MIN(vector_end, s->vectors);
 337
 338    for (vector = vector_start; vector < vector_end; vector++) {
 339        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
 340
 341        if (!msix_is_masked(dev, vector)) {
 342            continue;
 343        }
 344
 345        if (event_notifier_test_and_clear(notifier)) {
 346            msix_set_pending(dev, vector);
 347        }
 348    }
 349}
 350
 351static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
 352                                 int vector)
 353{
 354    int eventfd = event_notifier_get_fd(n);
 355
 356    assert(!s->msi_vectors[vector].pdev);
 357    s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 358
 359    qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
 360                        NULL, &s->msi_vectors[vector]);
 361}
 362
 363static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
 364{
 365    memory_region_add_eventfd(&s->ivshmem_mmio,
 366                              DOORBELL,
 367                              4,
 368                              true,
 369                              (posn << 16) | i,
 370                              &s->peers[posn].eventfds[i]);
 371}
 372
 373static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
 374{
 375    memory_region_del_eventfd(&s->ivshmem_mmio,
 376                              DOORBELL,
 377                              4,
 378                              true,
 379                              (posn << 16) | i,
 380                              &s->peers[posn].eventfds[i]);
 381}
 382
 383static void close_peer_eventfds(IVShmemState *s, int posn)
 384{
 385    int i, n;
 386
 387    assert(posn >= 0 && posn < s->nb_peers);
 388    n = s->peers[posn].nb_eventfds;
 389
 390    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 391        memory_region_transaction_begin();
 392        for (i = 0; i < n; i++) {
 393            ivshmem_del_eventfd(s, posn, i);
 394        }
 395        memory_region_transaction_commit();
 396    }
 397
 398    for (i = 0; i < n; i++) {
 399        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
 400    }
 401
 402    g_free(s->peers[posn].eventfds);
 403    s->peers[posn].nb_eventfds = 0;
 404}
 405
 406static void resize_peers(IVShmemState *s, int nb_peers)
 407{
 408    int old_nb_peers = s->nb_peers;
 409    int i;
 410
 411    assert(nb_peers > old_nb_peers);
 412    IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
 413
 414    s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
 415    s->nb_peers = nb_peers;
 416
 417    for (i = old_nb_peers; i < nb_peers; i++) {
 418        s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
 419        s->peers[i].nb_eventfds = 0;
 420    }
 421}
 422
 423static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
 424                                     Error **errp)
 425{
 426    PCIDevice *pdev = PCI_DEVICE(s);
 427    int ret;
 428
 429    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 430    assert(!s->msi_vectors[vector].pdev);
 431
 432    ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
 433    if (ret < 0) {
 434        error_setg(errp, "kvm_irqchip_add_msi_route failed");
 435        return;
 436    }
 437
 438    s->msi_vectors[vector].virq = ret;
 439    s->msi_vectors[vector].pdev = pdev;
 440}
 441
 442static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 443{
 444    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 445    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
 446        ivshmem_has_feature(s, IVSHMEM_MSI);
 447    PCIDevice *pdev = PCI_DEVICE(s);
 448    Error *err = NULL;
 449
 450    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
 451
 452    if (!with_irqfd) {
 453        IVSHMEM_DPRINTF("with eventfd\n");
 454        watch_vector_notifier(s, n, vector);
 455    } else if (msix_enabled(pdev)) {
 456        IVSHMEM_DPRINTF("with irqfd\n");
 457        ivshmem_add_kvm_msi_virq(s, vector, &err);
 458        if (err) {
 459            error_propagate(errp, err);
 460            return;
 461        }
 462
 463        if (!msix_is_masked(pdev, vector)) {
 464            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
 465                                               s->msi_vectors[vector].virq);
 466            /* TODO handle error */
 467        }
 468    } else {
 469        /* it will be delayed until msix is enabled, in write_config */
 470        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
 471    }
 472}
 473
 474static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 475{
 476    Error *local_err = NULL;
 477    struct stat buf;
 478    size_t size;
 479
 480    if (s->ivshmem_bar2) {
 481        error_setg(errp, "server sent unexpected shared memory message");
 482        close(fd);
 483        return;
 484    }
 485
 486    if (fstat(fd, &buf) < 0) {
 487        error_setg_errno(errp, errno,
 488            "can't determine size of shared memory sent by server");
 489        close(fd);
 490        return;
 491    }
 492
 493    size = buf.st_size;
 494
 495    /* mmap the region and map into the BAR2 */
 496    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
 497                                   "ivshmem.bar2", size, true, fd, 0,
 498                                   &local_err);
 499    if (local_err) {
 500        error_propagate(errp, local_err);
 501        return;
 502    }
 503
 504    s->ivshmem_bar2 = &s->server_bar2;
 505}
 506
 507static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
 508                                   Error **errp)
 509{
 510    IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
 511    if (posn >= s->nb_peers || posn == s->vm_id) {
 512        error_setg(errp, "invalid peer %d", posn);
 513        return;
 514    }
 515    close_peer_eventfds(s, posn);
 516}
 517
 518static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
 519                                Error **errp)
 520{
 521    Peer *peer = &s->peers[posn];
 522    int vector;
 523
 524    /*
 525     * The N-th connect message for this peer comes with the file
 526     * descriptor for vector N-1.  Count messages to find the vector.
 527     */
 528    if (peer->nb_eventfds >= s->vectors) {
 529        error_setg(errp, "Too many eventfd received, device has %d vectors",
 530                   s->vectors);
 531        close(fd);
 532        return;
 533    }
 534    vector = peer->nb_eventfds++;
 535
 536    IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
 537    event_notifier_init_fd(&peer->eventfds[vector], fd);
 538    fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
 539
 540    if (posn == s->vm_id) {
 541        setup_interrupt(s, vector, errp);
 542        /* TODO do we need to handle the error? */
 543    }
 544
 545    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 546        ivshmem_add_eventfd(s, posn, vector);
 547    }
 548}
 549
 550static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
 551{
 552    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
 553
 554    if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
 555        error_setg(errp, "server sent invalid message %" PRId64, msg);
 556        close(fd);
 557        return;
 558    }
 559
 560    if (msg == -1) {
 561        process_msg_shmem(s, fd, errp);
 562        return;
 563    }
 564
 565    if (msg >= s->nb_peers) {
 566        resize_peers(s, msg + 1);
 567    }
 568
 569    if (fd >= 0) {
 570        process_msg_connect(s, msg, fd, errp);
 571    } else {
 572        process_msg_disconnect(s, msg, errp);
 573    }
 574}
 575
 576static int ivshmem_can_receive(void *opaque)
 577{
 578    IVShmemState *s = opaque;
 579
 580    assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
 581    return sizeof(s->msg_buf) - s->msg_buffered_bytes;
 582}
 583
 584static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 585{
 586    IVShmemState *s = opaque;
 587    Error *err = NULL;
 588    int fd;
 589    int64_t msg;
 590
 591    assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
 592    memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
 593    s->msg_buffered_bytes += size;
 594    if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
 595        return;
 596    }
 597    msg = le64_to_cpu(s->msg_buf);
 598    s->msg_buffered_bytes = 0;
 599
 600    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 601
 602    process_msg(s, msg, fd, &err);
 603    if (err) {
 604        error_report_err(err);
 605    }
 606}
 607
 608static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 609{
 610    int64_t msg;
 611    int n, ret;
 612
 613    n = 0;
 614    do {
 615        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
 616                                   sizeof(msg) - n);
 617        if (ret < 0) {
 618            if (ret == -EINTR) {
 619                continue;
 620            }
 621            error_setg_errno(errp, -ret, "read from server failed");
 622            return INT64_MIN;
 623        }
 624        n += ret;
 625    } while (n < sizeof(msg));
 626
 627    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
 628    return le64_to_cpu(msg);
 629}
 630
 631static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
 632{
 633    Error *err = NULL;
 634    int64_t msg;
 635    int fd;
 636
 637    msg = ivshmem_recv_msg(s, &fd, &err);
 638    if (err) {
 639        error_propagate(errp, err);
 640        return;
 641    }
 642    if (msg != IVSHMEM_PROTOCOL_VERSION) {
 643        error_setg(errp, "server sent version %" PRId64 ", expecting %d",
 644                   msg, IVSHMEM_PROTOCOL_VERSION);
 645        return;
 646    }
 647    if (fd != -1) {
 648        error_setg(errp, "server sent invalid version message");
 649        return;
 650    }
 651
 652    /*
 653     * ivshmem-server sends the remaining initial messages in a fixed
 654     * order, but the device has always accepted them in any order.
 655     * Stay as compatible as practical, just in case people use
 656     * servers that behave differently.
 657     */
 658
 659    /*
 660     * ivshmem_device_spec.txt has always required the ID message
 661     * right here, and ivshmem-server has always complied.  However,
 662     * older versions of the device accepted it out of order, but
 663     * broke when an interrupt setup message arrived before it.
 664     */
 665    msg = ivshmem_recv_msg(s, &fd, &err);
 666    if (err) {
 667        error_propagate(errp, err);
 668        return;
 669    }
 670    if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
 671        error_setg(errp, "server sent invalid ID message");
 672        return;
 673    }
 674    s->vm_id = msg;
 675
 676    /*
 677     * Receive more messages until we got shared memory.
 678     */
 679    do {
 680        msg = ivshmem_recv_msg(s, &fd, &err);
 681        if (err) {
 682            error_propagate(errp, err);
 683            return;
 684        }
 685        process_msg(s, msg, fd, &err);
 686        if (err) {
 687            error_propagate(errp, err);
 688            return;
 689        }
 690    } while (msg != -1);
 691
 692    /*
 693     * This function must either map the shared memory or fail.  The
 694     * loop above ensures that: it terminates normally only after it
 695     * successfully processed the server's shared memory message.
 696     * Assert that actually mapped the shared memory:
 697     */
 698    assert(s->ivshmem_bar2);
 699}
 700
 701/* Select the MSI-X vectors used by device.
 702 * ivshmem maps events to vectors statically, so
 703 * we just enable all vectors on init and after reset. */
 704static void ivshmem_msix_vector_use(IVShmemState *s)
 705{
 706    PCIDevice *d = PCI_DEVICE(s);
 707    int i;
 708
 709    for (i = 0; i < s->vectors; i++) {
 710        msix_vector_use(d, i);
 711    }
 712}
 713
 714static void ivshmem_disable_irqfd(IVShmemState *s);
 715
 716static void ivshmem_reset(DeviceState *d)
 717{
 718    IVShmemState *s = IVSHMEM_COMMON(d);
 719
 720    ivshmem_disable_irqfd(s);
 721
 722    s->intrstatus = 0;
 723    s->intrmask = 0;
 724    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 725        ivshmem_msix_vector_use(s);
 726    }
 727}
 728
 729static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 730{
 731    /* allocate QEMU callback data for receiving interrupts */
 732    s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 733
 734    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 735        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
 736            return -1;
 737        }
 738
 739        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 740        ivshmem_msix_vector_use(s);
 741    }
 742
 743    return 0;
 744}
 745
 746static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 747{
 748    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 749
 750    if (s->msi_vectors[vector].pdev == NULL) {
 751        return;
 752    }
 753
 754    /* it was cleaned when masked in the frontend. */
 755    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 756
 757    s->msi_vectors[vector].pdev = NULL;
 758}
 759
 760static void ivshmem_enable_irqfd(IVShmemState *s)
 761{
 762    PCIDevice *pdev = PCI_DEVICE(s);
 763    int i;
 764
 765    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 766        Error *err = NULL;
 767
 768        ivshmem_add_kvm_msi_virq(s, i, &err);
 769        if (err) {
 770            error_report_err(err);
 771            goto undo;
 772        }
 773    }
 774
 775    if (msix_set_vector_notifiers(pdev,
 776                                  ivshmem_vector_unmask,
 777                                  ivshmem_vector_mask,
 778                                  ivshmem_vector_poll)) {
 779        error_report("ivshmem: msix_set_vector_notifiers failed");
 780        goto undo;
 781    }
 782    return;
 783
 784undo:
 785    while (--i >= 0) {
 786        ivshmem_remove_kvm_msi_virq(s, i);
 787    }
 788}
 789
 790static void ivshmem_disable_irqfd(IVShmemState *s)
 791{
 792    PCIDevice *pdev = PCI_DEVICE(s);
 793    int i;
 794
 795    if (!pdev->msix_vector_use_notifier) {
 796        return;
 797    }
 798
 799    msix_unset_vector_notifiers(pdev);
 800
 801    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 802        /*
 803         * MSI-X is already disabled here so msix_unset_vector_notifiers()
 804         * didn't call our release notifier.  Do it now to keep our masks and
 805         * unmasks balanced.
 806         */
 807        if (s->msi_vectors[i].unmasked) {
 808            ivshmem_vector_mask(pdev, i);
 809        }
 810        ivshmem_remove_kvm_msi_virq(s, i);
 811    }
 812
 813}
 814
 815static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
 816                                 uint32_t val, int len)
 817{
 818    IVShmemState *s = IVSHMEM_COMMON(pdev);
 819    int is_enabled, was_enabled = msix_enabled(pdev);
 820
 821    pci_default_write_config(pdev, address, val, len);
 822    is_enabled = msix_enabled(pdev);
 823
 824    if (kvm_msi_via_irqfd_enabled()) {
 825        if (!was_enabled && is_enabled) {
 826            ivshmem_enable_irqfd(s);
 827        } else if (was_enabled && !is_enabled) {
 828            ivshmem_disable_irqfd(s);
 829        }
 830    }
 831}
 832
 833static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
 834{
 835    IVShmemState *s = IVSHMEM_COMMON(dev);
 836    Error *err = NULL;
 837    uint8_t *pci_conf;
 838
 839    /* IRQFD requires MSI */
 840    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
 841        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
 842        error_setg(errp, "ioeventfd/irqfd requires MSI");
 843        return;
 844    }
 845
 846    pci_conf = dev->config;
 847    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
 848
 849    memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
 850                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 851
 852    /* region for registers*/
 853    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
 854                     &s->ivshmem_mmio);
 855
 856    if (s->hostmem != NULL) {
 857        IVSHMEM_DPRINTF("using hostmem\n");
 858
 859        s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
 860        host_memory_backend_set_mapped(s->hostmem, true);
 861    } else {
 862        Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
 863        assert(chr);
 864
 865        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
 866                        chr->filename);
 867
 868        /* we allocate enough space for 16 peers and grow as needed */
 869        resize_peers(s, 16);
 870
 871        /*
 872         * Receive setup messages from server synchronously.
 873         * Older versions did it asynchronously, but that creates a
 874         * number of entertaining race conditions.
 875         */
 876        ivshmem_recv_setup(s, &err);
 877        if (err) {
 878            error_propagate(errp, err);
 879            return;
 880        }
 881
 882        if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
 883            error_setg(errp,
 884                       "master must connect to the server before any peers");
 885            return;
 886        }
 887
 888        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
 889                                 ivshmem_read, NULL, NULL, s, NULL, true);
 890
 891        if (ivshmem_setup_interrupts(s, errp) < 0) {
 892            error_prepend(errp, "Failed to initialize interrupts: ");
 893            return;
 894        }
 895    }
 896
 897    if (s->master == ON_OFF_AUTO_AUTO) {
 898        s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 899    }
 900
 901    if (!ivshmem_is_master(s)) {
 902        error_setg(&s->migration_blocker,
 903                   "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
 904        if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
 905            error_free(s->migration_blocker);
 906            return;
 907        }
 908    }
 909
 910    vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
 911    pci_register_bar(PCI_DEVICE(s), 2,
 912                     PCI_BASE_ADDRESS_SPACE_MEMORY |
 913                     PCI_BASE_ADDRESS_MEM_PREFETCH |
 914                     PCI_BASE_ADDRESS_MEM_TYPE_64,
 915                     s->ivshmem_bar2);
 916}
 917
 918static void ivshmem_exit(PCIDevice *dev)
 919{
 920    IVShmemState *s = IVSHMEM_COMMON(dev);
 921    int i;
 922
 923    if (s->migration_blocker) {
 924        migrate_del_blocker(s->migration_blocker);
 925        error_free(s->migration_blocker);
 926    }
 927
 928    if (memory_region_is_mapped(s->ivshmem_bar2)) {
 929        if (!s->hostmem) {
 930            void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
 931            int fd;
 932
 933            if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
 934                error_report("Failed to munmap shared memory %s",
 935                             strerror(errno));
 936            }
 937
 938            fd = memory_region_get_fd(s->ivshmem_bar2);
 939            close(fd);
 940        }
 941
 942        vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
 943    }
 944
 945    if (s->hostmem) {
 946        host_memory_backend_set_mapped(s->hostmem, false);
 947    }
 948
 949    if (s->peers) {
 950        for (i = 0; i < s->nb_peers; i++) {
 951            close_peer_eventfds(s, i);
 952        }
 953        g_free(s->peers);
 954    }
 955
 956    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 957        msix_uninit_exclusive_bar(dev);
 958    }
 959
 960    g_free(s->msi_vectors);
 961}
 962
 963static int ivshmem_pre_load(void *opaque)
 964{
 965    IVShmemState *s = opaque;
 966
 967    if (!ivshmem_is_master(s)) {
 968        error_report("'peer' devices are not migratable");
 969        return -EINVAL;
 970    }
 971
 972    return 0;
 973}
 974
 975static int ivshmem_post_load(void *opaque, int version_id)
 976{
 977    IVShmemState *s = opaque;
 978
 979    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 980        ivshmem_msix_vector_use(s);
 981    }
 982    return 0;
 983}
 984
 985static void ivshmem_common_class_init(ObjectClass *klass, void *data)
 986{
 987    DeviceClass *dc = DEVICE_CLASS(klass);
 988    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 989
 990    k->realize = ivshmem_common_realize;
 991    k->exit = ivshmem_exit;
 992    k->config_write = ivshmem_write_config;
 993    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
 994    k->device_id = PCI_DEVICE_ID_IVSHMEM;
 995    k->class_id = PCI_CLASS_MEMORY_RAM;
 996    k->revision = 1;
 997    dc->reset = ivshmem_reset;
 998    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 999    dc->desc = "Inter-VM shared memory";
1000}
1001
1002static const TypeInfo ivshmem_common_info = {
1003    .name          = TYPE_IVSHMEM_COMMON,
1004    .parent        = TYPE_PCI_DEVICE,
1005    .instance_size = sizeof(IVShmemState),
1006    .abstract      = true,
1007    .class_init    = ivshmem_common_class_init,
1008    .interfaces = (InterfaceInfo[]) {
1009        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1010        { },
1011    },
1012};
1013
1014static const VMStateDescription ivshmem_plain_vmsd = {
1015    .name = TYPE_IVSHMEM_PLAIN,
1016    .version_id = 0,
1017    .minimum_version_id = 0,
1018    .pre_load = ivshmem_pre_load,
1019    .post_load = ivshmem_post_load,
1020    .fields = (VMStateField[]) {
1021        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1022        VMSTATE_UINT32(intrstatus, IVShmemState),
1023        VMSTATE_UINT32(intrmask, IVShmemState),
1024        VMSTATE_END_OF_LIST()
1025    },
1026};
1027
1028static Property ivshmem_plain_properties[] = {
1029    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1030    DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1031                     HostMemoryBackend *),
1032    DEFINE_PROP_END_OF_LIST(),
1033};
1034
1035static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1036{
1037    IVShmemState *s = IVSHMEM_COMMON(dev);
1038
1039    if (!s->hostmem) {
1040        error_setg(errp, "You must specify a 'memdev'");
1041        return;
1042    } else if (host_memory_backend_is_mapped(s->hostmem)) {
1043        error_setg(errp, "can't use already busy memdev: %s",
1044                   object_get_canonical_path_component(OBJECT(s->hostmem)));
1045        return;
1046    }
1047
1048    ivshmem_common_realize(dev, errp);
1049}
1050
1051static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1052{
1053    DeviceClass *dc = DEVICE_CLASS(klass);
1054    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1055
1056    k->realize = ivshmem_plain_realize;
1057    device_class_set_props(dc, ivshmem_plain_properties);
1058    dc->vmsd = &ivshmem_plain_vmsd;
1059}
1060
1061static const TypeInfo ivshmem_plain_info = {
1062    .name          = TYPE_IVSHMEM_PLAIN,
1063    .parent        = TYPE_IVSHMEM_COMMON,
1064    .instance_size = sizeof(IVShmemState),
1065    .class_init    = ivshmem_plain_class_init,
1066};
1067
1068static const VMStateDescription ivshmem_doorbell_vmsd = {
1069    .name = TYPE_IVSHMEM_DOORBELL,
1070    .version_id = 0,
1071    .minimum_version_id = 0,
1072    .pre_load = ivshmem_pre_load,
1073    .post_load = ivshmem_post_load,
1074    .fields = (VMStateField[]) {
1075        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1076        VMSTATE_MSIX(parent_obj, IVShmemState),
1077        VMSTATE_UINT32(intrstatus, IVShmemState),
1078        VMSTATE_UINT32(intrmask, IVShmemState),
1079        VMSTATE_END_OF_LIST()
1080    },
1081};
1082
1083static Property ivshmem_doorbell_properties[] = {
1084    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1085    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1086    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1087                    true),
1088    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1089    DEFINE_PROP_END_OF_LIST(),
1090};
1091
1092static void ivshmem_doorbell_init(Object *obj)
1093{
1094    IVShmemState *s = IVSHMEM_DOORBELL(obj);
1095
1096    s->features |= (1 << IVSHMEM_MSI);
1097}
1098
1099static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1100{
1101    IVShmemState *s = IVSHMEM_COMMON(dev);
1102
1103    if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1104        error_setg(errp, "You must specify a 'chardev'");
1105        return;
1106    }
1107
1108    ivshmem_common_realize(dev, errp);
1109}
1110
1111static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1112{
1113    DeviceClass *dc = DEVICE_CLASS(klass);
1114    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1115
1116    k->realize = ivshmem_doorbell_realize;
1117    device_class_set_props(dc, ivshmem_doorbell_properties);
1118    dc->vmsd = &ivshmem_doorbell_vmsd;
1119}
1120
1121static const TypeInfo ivshmem_doorbell_info = {
1122    .name          = TYPE_IVSHMEM_DOORBELL,
1123    .parent        = TYPE_IVSHMEM_COMMON,
1124    .instance_size = sizeof(IVShmemState),
1125    .instance_init = ivshmem_doorbell_init,
1126    .class_init    = ivshmem_doorbell_class_init,
1127};
1128
1129static void ivshmem_register_types(void)
1130{
1131    type_register_static(&ivshmem_common_info);
1132    type_register_static(&ivshmem_plain_info);
1133    type_register_static(&ivshmem_doorbell_info);
1134}
1135
1136type_init(ivshmem_register_types)
1137