qemu/hw/misc/ivshmem.c
<<
>>
Prefs
   1/*
   2 * Inter-VM Shared Memory PCI device.
   3 *
   4 * Author:
   5 *      Cam Macdonell <cam@cs.ualberta.ca>
   6 *
   7 * Based On: cirrus_vga.c
   8 *          Copyright (c) 2004 Fabrice Bellard
   9 *          Copyright (c) 2004 Makoto Suzuki (suzu)
  10 *
  11 *      and rtl8139.c
  12 *          Copyright (c) 2006 Igor Kovalenko
  13 *
  14 * This code is licensed under the GNU GPL v2.
  15 *
  16 * Contributions after 2012-01-13 are licensed under the terms of the
  17 * GNU GPL, version 2 or (at your option) any later version.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "qapi/error.h"
  23#include "qemu/cutils.h"
  24#include "hw/pci/pci.h"
  25#include "hw/qdev-properties.h"
  26#include "hw/qdev-properties-system.h"
  27#include "hw/pci/msi.h"
  28#include "hw/pci/msix.h"
  29#include "sysemu/kvm.h"
  30#include "migration/blocker.h"
  31#include "migration/vmstate.h"
  32#include "qemu/error-report.h"
  33#include "qemu/event_notifier.h"
  34#include "qemu/module.h"
  35#include "qom/object_interfaces.h"
  36#include "chardev/char-fe.h"
  37#include "sysemu/hostmem.h"
  38#include "qapi/visitor.h"
  39
  40#include "hw/misc/ivshmem.h"
  41#include "qom/object.h"
  42
  43#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
  44#define PCI_DEVICE_ID_IVSHMEM   0x1110
  45
  46#define IVSHMEM_MAX_PEERS UINT16_MAX
  47#define IVSHMEM_IOEVENTFD   0
  48#define IVSHMEM_MSI     1
  49
  50#define IVSHMEM_REG_BAR_SIZE 0x100
  51
  52#define IVSHMEM_DEBUG 0
  53#define IVSHMEM_DPRINTF(fmt, ...)                       \
  54    do {                                                \
  55        if (IVSHMEM_DEBUG) {                            \
  56            printf("IVSHMEM: " fmt, ## __VA_ARGS__);    \
  57        }                                               \
  58    } while (0)
  59
  60#define TYPE_IVSHMEM_COMMON "ivshmem-common"
  61typedef struct IVShmemState IVShmemState;
  62DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_COMMON,
  63                         TYPE_IVSHMEM_COMMON)
  64
  65#define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  66DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_PLAIN,
  67                         TYPE_IVSHMEM_PLAIN)
  68
  69#define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  70DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_DOORBELL,
  71                         TYPE_IVSHMEM_DOORBELL)
  72
  73#define TYPE_IVSHMEM "ivshmem"
  74DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM,
  75                         TYPE_IVSHMEM)
  76
  77typedef struct Peer {
  78    int nb_eventfds;
  79    EventNotifier *eventfds;
  80} Peer;
  81
  82typedef struct MSIVector {
  83    PCIDevice *pdev;
  84    int virq;
  85    bool unmasked;
  86} MSIVector;
  87
  88struct IVShmemState {
  89    /*< private >*/
  90    PCIDevice parent_obj;
  91    /*< public >*/
  92
  93    uint32_t features;
  94
  95    /* exactly one of these two may be set */
  96    HostMemoryBackend *hostmem; /* with interrupts */
  97    CharBackend server_chr; /* without interrupts */
  98
  99    /* registers */
 100    uint32_t intrmask;
 101    uint32_t intrstatus;
 102    int vm_id;
 103
 104    /* BARs */
 105    MemoryRegion ivshmem_mmio;  /* BAR 0 (registers) */
 106    MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
 107    MemoryRegion server_bar2;   /* used with server_chr */
 108
 109    /* interrupt support */
 110    Peer *peers;
 111    int nb_peers;               /* space in @peers[] */
 112    uint32_t vectors;
 113    MSIVector *msi_vectors;
 114    uint64_t msg_buf;           /* buffer for receiving server messages */
 115    int msg_buffered_bytes;     /* #bytes in @msg_buf */
 116
 117    /* migration stuff */
 118    OnOffAuto master;
 119    Error *migration_blocker;
 120};
 121
 122/* registers for the Inter-VM shared memory device */
 123enum ivshmem_registers {
 124    INTRMASK = 0,
 125    INTRSTATUS = 4,
 126    IVPOSITION = 8,
 127    DOORBELL = 12,
 128};
 129
 130static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
 131                                                    unsigned int feature) {
 132    return (ivs->features & (1 << feature));
 133}
 134
 135static inline bool ivshmem_is_master(IVShmemState *s)
 136{
 137    assert(s->master != ON_OFF_AUTO_AUTO);
 138    return s->master == ON_OFF_AUTO_ON;
 139}
 140
 141static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
 142{
 143    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
 144
 145    s->intrmask = val;
 146}
 147
 148static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
 149{
 150    uint32_t ret = s->intrmask;
 151
 152    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
 153    return ret;
 154}
 155
 156static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
 157{
 158    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
 159
 160    s->intrstatus = val;
 161}
 162
 163static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
 164{
 165    uint32_t ret = s->intrstatus;
 166
 167    /* reading ISR clears all interrupts */
 168    s->intrstatus = 0;
 169    return ret;
 170}
 171
 172static void ivshmem_io_write(void *opaque, hwaddr addr,
 173                             uint64_t val, unsigned size)
 174{
 175    IVShmemState *s = opaque;
 176
 177    uint16_t dest = val >> 16;
 178    uint16_t vector = val & 0xff;
 179
 180    addr &= 0xfc;
 181
 182    IVSHMEM_DPRINTF("writing to addr " HWADDR_FMT_plx "\n", addr);
 183    switch (addr)
 184    {
 185        case INTRMASK:
 186            ivshmem_IntrMask_write(s, val);
 187            break;
 188
 189        case INTRSTATUS:
 190            ivshmem_IntrStatus_write(s, val);
 191            break;
 192
 193        case DOORBELL:
 194            /* check that dest VM ID is reasonable */
 195            if (dest >= s->nb_peers) {
 196                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
 197                break;
 198            }
 199
 200            /* check doorbell range */
 201            if (vector < s->peers[dest].nb_eventfds) {
 202                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
 203                event_notifier_set(&s->peers[dest].eventfds[vector]);
 204            } else {
 205                IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
 206                                vector, dest);
 207            }
 208            break;
 209        default:
 210            IVSHMEM_DPRINTF("Unhandled write " HWADDR_FMT_plx "\n", addr);
 211    }
 212}
 213
 214static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
 215                                unsigned size)
 216{
 217
 218    IVShmemState *s = opaque;
 219    uint32_t ret;
 220
 221    switch (addr)
 222    {
 223        case INTRMASK:
 224            ret = ivshmem_IntrMask_read(s);
 225            break;
 226
 227        case INTRSTATUS:
 228            ret = ivshmem_IntrStatus_read(s);
 229            break;
 230
 231        case IVPOSITION:
 232            ret = s->vm_id;
 233            break;
 234
 235        default:
 236            IVSHMEM_DPRINTF("why are we reading " HWADDR_FMT_plx "\n", addr);
 237            ret = 0;
 238    }
 239
 240    return ret;
 241}
 242
 243static const MemoryRegionOps ivshmem_mmio_ops = {
 244    .read = ivshmem_io_read,
 245    .write = ivshmem_io_write,
 246    .endianness = DEVICE_LITTLE_ENDIAN,
 247    .impl = {
 248        .min_access_size = 4,
 249        .max_access_size = 4,
 250    },
 251};
 252
 253static void ivshmem_vector_notify(void *opaque)
 254{
 255    MSIVector *entry = opaque;
 256    PCIDevice *pdev = entry->pdev;
 257    IVShmemState *s = IVSHMEM_COMMON(pdev);
 258    int vector = entry - s->msi_vectors;
 259    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 260
 261    if (!event_notifier_test_and_clear(n)) {
 262        return;
 263    }
 264
 265    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
 266    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 267        if (msix_enabled(pdev)) {
 268            msix_notify(pdev, vector);
 269        }
 270    } else {
 271        ivshmem_IntrStatus_write(s, 1);
 272    }
 273}
 274
 275static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
 276                                 MSIMessage msg)
 277{
 278    IVShmemState *s = IVSHMEM_COMMON(dev);
 279    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 280    MSIVector *v = &s->msi_vectors[vector];
 281    int ret;
 282
 283    IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
 284    if (!v->pdev) {
 285        error_report("ivshmem: vector %d route does not exist", vector);
 286        return -EINVAL;
 287    }
 288    assert(!v->unmasked);
 289
 290    ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
 291    if (ret < 0) {
 292        return ret;
 293    }
 294    kvm_irqchip_commit_routes(kvm_state);
 295
 296    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 297    if (ret < 0) {
 298        return ret;
 299    }
 300    v->unmasked = true;
 301
 302    return 0;
 303}
 304
 305static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
 306{
 307    IVShmemState *s = IVSHMEM_COMMON(dev);
 308    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 309    MSIVector *v = &s->msi_vectors[vector];
 310    int ret;
 311
 312    IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
 313    if (!v->pdev) {
 314        error_report("ivshmem: vector %d route does not exist", vector);
 315        return;
 316    }
 317    assert(v->unmasked);
 318
 319    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
 320    if (ret < 0) {
 321        error_report("remove_irqfd_notifier_gsi failed");
 322        return;
 323    }
 324    v->unmasked = false;
 325}
 326
 327static void ivshmem_vector_poll(PCIDevice *dev,
 328                                unsigned int vector_start,
 329                                unsigned int vector_end)
 330{
 331    IVShmemState *s = IVSHMEM_COMMON(dev);
 332    unsigned int vector;
 333
 334    IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
 335
 336    vector_end = MIN(vector_end, s->vectors);
 337
 338    for (vector = vector_start; vector < vector_end; vector++) {
 339        EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
 340
 341        if (!msix_is_masked(dev, vector)) {
 342            continue;
 343        }
 344
 345        if (event_notifier_test_and_clear(notifier)) {
 346            msix_set_pending(dev, vector);
 347        }
 348    }
 349}
 350
 351static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
 352                                 int vector)
 353{
 354    int eventfd = event_notifier_get_fd(n);
 355
 356    assert(!s->msi_vectors[vector].pdev);
 357    s->msi_vectors[vector].pdev = PCI_DEVICE(s);
 358
 359    qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
 360                        NULL, &s->msi_vectors[vector]);
 361}
 362
 363static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
 364{
 365    memory_region_add_eventfd(&s->ivshmem_mmio,
 366                              DOORBELL,
 367                              4,
 368                              true,
 369                              (posn << 16) | i,
 370                              &s->peers[posn].eventfds[i]);
 371}
 372
 373static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
 374{
 375    memory_region_del_eventfd(&s->ivshmem_mmio,
 376                              DOORBELL,
 377                              4,
 378                              true,
 379                              (posn << 16) | i,
 380                              &s->peers[posn].eventfds[i]);
 381}
 382
 383static void close_peer_eventfds(IVShmemState *s, int posn)
 384{
 385    int i, n;
 386
 387    assert(posn >= 0 && posn < s->nb_peers);
 388    n = s->peers[posn].nb_eventfds;
 389
 390    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 391        memory_region_transaction_begin();
 392        for (i = 0; i < n; i++) {
 393            ivshmem_del_eventfd(s, posn, i);
 394        }
 395        memory_region_transaction_commit();
 396    }
 397
 398    for (i = 0; i < n; i++) {
 399        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
 400    }
 401
 402    g_free(s->peers[posn].eventfds);
 403    s->peers[posn].nb_eventfds = 0;
 404}
 405
 406static void resize_peers(IVShmemState *s, int nb_peers)
 407{
 408    int old_nb_peers = s->nb_peers;
 409    int i;
 410
 411    assert(nb_peers > old_nb_peers);
 412    IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
 413
 414    s->peers = g_renew(Peer, s->peers, nb_peers);
 415    s->nb_peers = nb_peers;
 416
 417    for (i = old_nb_peers; i < nb_peers; i++) {
 418        s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
 419        s->peers[i].nb_eventfds = 0;
 420    }
 421}
 422
 423static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
 424                                     Error **errp)
 425{
 426    PCIDevice *pdev = PCI_DEVICE(s);
 427    KVMRouteChange c;
 428    int ret;
 429
 430    IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
 431    assert(!s->msi_vectors[vector].pdev);
 432
 433    c = kvm_irqchip_begin_route_changes(kvm_state);
 434    ret = kvm_irqchip_add_msi_route(&c, vector, pdev);
 435    if (ret < 0) {
 436        error_setg(errp, "kvm_irqchip_add_msi_route failed");
 437        return;
 438    }
 439    kvm_irqchip_commit_route_changes(&c);
 440
 441    s->msi_vectors[vector].virq = ret;
 442    s->msi_vectors[vector].pdev = pdev;
 443}
 444
 445static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
 446{
 447    EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
 448    bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
 449        ivshmem_has_feature(s, IVSHMEM_MSI);
 450    PCIDevice *pdev = PCI_DEVICE(s);
 451    Error *err = NULL;
 452
 453    IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
 454
 455    if (!with_irqfd) {
 456        IVSHMEM_DPRINTF("with eventfd\n");
 457        watch_vector_notifier(s, n, vector);
 458    } else if (msix_enabled(pdev)) {
 459        IVSHMEM_DPRINTF("with irqfd\n");
 460        ivshmem_add_kvm_msi_virq(s, vector, &err);
 461        if (err) {
 462            error_propagate(errp, err);
 463            return;
 464        }
 465
 466        if (!msix_is_masked(pdev, vector)) {
 467            kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
 468                                               s->msi_vectors[vector].virq);
 469            /* TODO handle error */
 470        }
 471    } else {
 472        /* it will be delayed until msix is enabled, in write_config */
 473        IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
 474    }
 475}
 476
 477static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 478{
 479    Error *local_err = NULL;
 480    struct stat buf;
 481    size_t size;
 482
 483    if (s->ivshmem_bar2) {
 484        error_setg(errp, "server sent unexpected shared memory message");
 485        close(fd);
 486        return;
 487    }
 488
 489    if (fstat(fd, &buf) < 0) {
 490        error_setg_errno(errp, errno,
 491            "can't determine size of shared memory sent by server");
 492        close(fd);
 493        return;
 494    }
 495
 496    size = buf.st_size;
 497
 498    /* mmap the region and map into the BAR2 */
 499    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
 500                                   size, RAM_SHARED, fd, 0, &local_err);
 501    if (local_err) {
 502        error_propagate(errp, local_err);
 503        return;
 504    }
 505
 506    s->ivshmem_bar2 = &s->server_bar2;
 507}
 508
 509static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
 510                                   Error **errp)
 511{
 512    IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
 513    if (posn >= s->nb_peers || posn == s->vm_id) {
 514        error_setg(errp, "invalid peer %d", posn);
 515        return;
 516    }
 517    close_peer_eventfds(s, posn);
 518}
 519
 520static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
 521                                Error **errp)
 522{
 523    Peer *peer = &s->peers[posn];
 524    int vector;
 525
 526    /*
 527     * The N-th connect message for this peer comes with the file
 528     * descriptor for vector N-1.  Count messages to find the vector.
 529     */
 530    if (peer->nb_eventfds >= s->vectors) {
 531        error_setg(errp, "Too many eventfd received, device has %d vectors",
 532                   s->vectors);
 533        close(fd);
 534        return;
 535    }
 536    vector = peer->nb_eventfds++;
 537
 538    IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
 539    event_notifier_init_fd(&peer->eventfds[vector], fd);
 540    g_unix_set_fd_nonblocking(fd, true, NULL); /* msix/irqfd poll non block */
 541
 542    if (posn == s->vm_id) {
 543        setup_interrupt(s, vector, errp);
 544        /* TODO do we need to handle the error? */
 545    }
 546
 547    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
 548        ivshmem_add_eventfd(s, posn, vector);
 549    }
 550}
 551
 552static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
 553{
 554    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
 555
 556    if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
 557        error_setg(errp, "server sent invalid message %" PRId64, msg);
 558        close(fd);
 559        return;
 560    }
 561
 562    if (msg == -1) {
 563        process_msg_shmem(s, fd, errp);
 564        return;
 565    }
 566
 567    if (msg >= s->nb_peers) {
 568        resize_peers(s, msg + 1);
 569    }
 570
 571    if (fd >= 0) {
 572        process_msg_connect(s, msg, fd, errp);
 573    } else {
 574        process_msg_disconnect(s, msg, errp);
 575    }
 576}
 577
 578static int ivshmem_can_receive(void *opaque)
 579{
 580    IVShmemState *s = opaque;
 581
 582    assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
 583    return sizeof(s->msg_buf) - s->msg_buffered_bytes;
 584}
 585
 586static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
 587{
 588    IVShmemState *s = opaque;
 589    Error *err = NULL;
 590    int fd;
 591    int64_t msg;
 592
 593    assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
 594    memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
 595    s->msg_buffered_bytes += size;
 596    if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
 597        return;
 598    }
 599    msg = le64_to_cpu(s->msg_buf);
 600    s->msg_buffered_bytes = 0;
 601
 602    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 603
 604    process_msg(s, msg, fd, &err);
 605    if (err) {
 606        error_report_err(err);
 607    }
 608}
 609
 610static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 611{
 612    int64_t msg;
 613    int n, ret;
 614
 615    n = 0;
 616    do {
 617        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
 618                                   sizeof(msg) - n);
 619        if (ret < 0) {
 620            if (ret == -EINTR) {
 621                continue;
 622            }
 623            error_setg_errno(errp, -ret, "read from server failed");
 624            return INT64_MIN;
 625        }
 626        n += ret;
 627    } while (n < sizeof(msg));
 628
 629    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
 630    return le64_to_cpu(msg);
 631}
 632
 633static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
 634{
 635    Error *err = NULL;
 636    int64_t msg;
 637    int fd;
 638
 639    msg = ivshmem_recv_msg(s, &fd, &err);
 640    if (err) {
 641        error_propagate(errp, err);
 642        return;
 643    }
 644    if (msg != IVSHMEM_PROTOCOL_VERSION) {
 645        error_setg(errp, "server sent version %" PRId64 ", expecting %d",
 646                   msg, IVSHMEM_PROTOCOL_VERSION);
 647        return;
 648    }
 649    if (fd != -1) {
 650        error_setg(errp, "server sent invalid version message");
 651        return;
 652    }
 653
 654    /*
 655     * ivshmem-server sends the remaining initial messages in a fixed
 656     * order, but the device has always accepted them in any order.
 657     * Stay as compatible as practical, just in case people use
 658     * servers that behave differently.
 659     */
 660
 661    /*
 662     * ivshmem_device_spec.txt has always required the ID message
 663     * right here, and ivshmem-server has always complied.  However,
 664     * older versions of the device accepted it out of order, but
 665     * broke when an interrupt setup message arrived before it.
 666     */
 667    msg = ivshmem_recv_msg(s, &fd, &err);
 668    if (err) {
 669        error_propagate(errp, err);
 670        return;
 671    }
 672    if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
 673        error_setg(errp, "server sent invalid ID message");
 674        return;
 675    }
 676    s->vm_id = msg;
 677
 678    /*
 679     * Receive more messages until we got shared memory.
 680     */
 681    do {
 682        msg = ivshmem_recv_msg(s, &fd, &err);
 683        if (err) {
 684            error_propagate(errp, err);
 685            return;
 686        }
 687        process_msg(s, msg, fd, &err);
 688        if (err) {
 689            error_propagate(errp, err);
 690            return;
 691        }
 692    } while (msg != -1);
 693
 694    /*
 695     * This function must either map the shared memory or fail.  The
 696     * loop above ensures that: it terminates normally only after it
 697     * successfully processed the server's shared memory message.
 698     * Assert that actually mapped the shared memory:
 699     */
 700    assert(s->ivshmem_bar2);
 701}
 702
 703/* Select the MSI-X vectors used by device.
 704 * ivshmem maps events to vectors statically, so
 705 * we just enable all vectors on init and after reset. */
 706static void ivshmem_msix_vector_use(IVShmemState *s)
 707{
 708    PCIDevice *d = PCI_DEVICE(s);
 709    int i;
 710
 711    for (i = 0; i < s->vectors; i++) {
 712        msix_vector_use(d, i);
 713    }
 714}
 715
 716static void ivshmem_disable_irqfd(IVShmemState *s);
 717
 718static void ivshmem_reset(DeviceState *d)
 719{
 720    IVShmemState *s = IVSHMEM_COMMON(d);
 721
 722    ivshmem_disable_irqfd(s);
 723
 724    s->intrstatus = 0;
 725    s->intrmask = 0;
 726    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 727        ivshmem_msix_vector_use(s);
 728    }
 729}
 730
 731static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 732{
 733    /* allocate QEMU callback data for receiving interrupts */
 734    s->msi_vectors = g_new0(MSIVector, s->vectors);
 735
 736    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 737        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
 738            return -1;
 739        }
 740
 741        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
 742        ivshmem_msix_vector_use(s);
 743    }
 744
 745    return 0;
 746}
 747
 748static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
 749{
 750    IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
 751
 752    if (s->msi_vectors[vector].pdev == NULL) {
 753        return;
 754    }
 755
 756    /* it was cleaned when masked in the frontend. */
 757    kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
 758
 759    s->msi_vectors[vector].pdev = NULL;
 760}
 761
 762static void ivshmem_enable_irqfd(IVShmemState *s)
 763{
 764    PCIDevice *pdev = PCI_DEVICE(s);
 765    int i;
 766
 767    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 768        Error *err = NULL;
 769
 770        ivshmem_add_kvm_msi_virq(s, i, &err);
 771        if (err) {
 772            error_report_err(err);
 773            goto undo;
 774        }
 775    }
 776
 777    if (msix_set_vector_notifiers(pdev,
 778                                  ivshmem_vector_unmask,
 779                                  ivshmem_vector_mask,
 780                                  ivshmem_vector_poll)) {
 781        error_report("ivshmem: msix_set_vector_notifiers failed");
 782        goto undo;
 783    }
 784    return;
 785
 786undo:
 787    while (--i >= 0) {
 788        ivshmem_remove_kvm_msi_virq(s, i);
 789    }
 790}
 791
 792static void ivshmem_disable_irqfd(IVShmemState *s)
 793{
 794    PCIDevice *pdev = PCI_DEVICE(s);
 795    int i;
 796
 797    if (!pdev->msix_vector_use_notifier) {
 798        return;
 799    }
 800
 801    msix_unset_vector_notifiers(pdev);
 802
 803    for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
 804        /*
 805         * MSI-X is already disabled here so msix_unset_vector_notifiers()
 806         * didn't call our release notifier.  Do it now to keep our masks and
 807         * unmasks balanced.
 808         */
 809        if (s->msi_vectors[i].unmasked) {
 810            ivshmem_vector_mask(pdev, i);
 811        }
 812        ivshmem_remove_kvm_msi_virq(s, i);
 813    }
 814
 815}
 816
 817static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
 818                                 uint32_t val, int len)
 819{
 820    IVShmemState *s = IVSHMEM_COMMON(pdev);
 821    int is_enabled, was_enabled = msix_enabled(pdev);
 822
 823    pci_default_write_config(pdev, address, val, len);
 824    is_enabled = msix_enabled(pdev);
 825
 826    if (kvm_msi_via_irqfd_enabled()) {
 827        if (!was_enabled && is_enabled) {
 828            ivshmem_enable_irqfd(s);
 829        } else if (was_enabled && !is_enabled) {
 830            ivshmem_disable_irqfd(s);
 831        }
 832    }
 833}
 834
 835static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
 836{
 837    IVShmemState *s = IVSHMEM_COMMON(dev);
 838    Error *err = NULL;
 839    uint8_t *pci_conf;
 840
 841    /* IRQFD requires MSI */
 842    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
 843        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
 844        error_setg(errp, "ioeventfd/irqfd requires MSI");
 845        return;
 846    }
 847
 848    pci_conf = dev->config;
 849    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
 850
 851    memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
 852                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
 853
 854    /* region for registers*/
 855    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
 856                     &s->ivshmem_mmio);
 857
 858    if (s->hostmem != NULL) {
 859        IVSHMEM_DPRINTF("using hostmem\n");
 860
 861        s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
 862        host_memory_backend_set_mapped(s->hostmem, true);
 863    } else {
 864        Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
 865        assert(chr);
 866
 867        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
 868                        chr->filename);
 869
 870        /* we allocate enough space for 16 peers and grow as needed */
 871        resize_peers(s, 16);
 872
 873        /*
 874         * Receive setup messages from server synchronously.
 875         * Older versions did it asynchronously, but that creates a
 876         * number of entertaining race conditions.
 877         */
 878        ivshmem_recv_setup(s, &err);
 879        if (err) {
 880            error_propagate(errp, err);
 881            return;
 882        }
 883
 884        if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
 885            error_setg(errp,
 886                       "master must connect to the server before any peers");
 887            return;
 888        }
 889
 890        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
 891                                 ivshmem_read, NULL, NULL, s, NULL, true);
 892
 893        if (ivshmem_setup_interrupts(s, errp) < 0) {
 894            error_prepend(errp, "Failed to initialize interrupts: ");
 895            return;
 896        }
 897    }
 898
 899    if (s->master == ON_OFF_AUTO_AUTO) {
 900        s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
 901    }
 902
 903    if (!ivshmem_is_master(s)) {
 904        error_setg(&s->migration_blocker,
 905                   "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
 906        if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
 907            error_free(s->migration_blocker);
 908            return;
 909        }
 910    }
 911
 912    vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
 913    pci_register_bar(PCI_DEVICE(s), 2,
 914                     PCI_BASE_ADDRESS_SPACE_MEMORY |
 915                     PCI_BASE_ADDRESS_MEM_PREFETCH |
 916                     PCI_BASE_ADDRESS_MEM_TYPE_64,
 917                     s->ivshmem_bar2);
 918}
 919
 920static void ivshmem_exit(PCIDevice *dev)
 921{
 922    IVShmemState *s = IVSHMEM_COMMON(dev);
 923    int i;
 924
 925    if (s->migration_blocker) {
 926        migrate_del_blocker(s->migration_blocker);
 927        error_free(s->migration_blocker);
 928    }
 929
 930    if (memory_region_is_mapped(s->ivshmem_bar2)) {
 931        if (!s->hostmem) {
 932            void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
 933            int fd;
 934
 935            if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
 936                error_report("Failed to munmap shared memory %s",
 937                             strerror(errno));
 938            }
 939
 940            fd = memory_region_get_fd(s->ivshmem_bar2);
 941            close(fd);
 942        }
 943
 944        vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
 945    }
 946
 947    if (s->hostmem) {
 948        host_memory_backend_set_mapped(s->hostmem, false);
 949    }
 950
 951    if (s->peers) {
 952        for (i = 0; i < s->nb_peers; i++) {
 953            close_peer_eventfds(s, i);
 954        }
 955        g_free(s->peers);
 956    }
 957
 958    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 959        msix_uninit_exclusive_bar(dev);
 960    }
 961
 962    g_free(s->msi_vectors);
 963}
 964
 965static int ivshmem_pre_load(void *opaque)
 966{
 967    IVShmemState *s = opaque;
 968
 969    if (!ivshmem_is_master(s)) {
 970        error_report("'peer' devices are not migratable");
 971        return -EINVAL;
 972    }
 973
 974    return 0;
 975}
 976
 977static int ivshmem_post_load(void *opaque, int version_id)
 978{
 979    IVShmemState *s = opaque;
 980
 981    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
 982        ivshmem_msix_vector_use(s);
 983    }
 984    return 0;
 985}
 986
 987static void ivshmem_common_class_init(ObjectClass *klass, void *data)
 988{
 989    DeviceClass *dc = DEVICE_CLASS(klass);
 990    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 991
 992    k->realize = ivshmem_common_realize;
 993    k->exit = ivshmem_exit;
 994    k->config_write = ivshmem_write_config;
 995    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
 996    k->device_id = PCI_DEVICE_ID_IVSHMEM;
 997    k->class_id = PCI_CLASS_MEMORY_RAM;
 998    k->revision = 1;
 999    dc->reset = ivshmem_reset;
1000    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1001    dc->desc = "Inter-VM shared memory";
1002}
1003
1004static const TypeInfo ivshmem_common_info = {
1005    .name          = TYPE_IVSHMEM_COMMON,
1006    .parent        = TYPE_PCI_DEVICE,
1007    .instance_size = sizeof(IVShmemState),
1008    .abstract      = true,
1009    .class_init    = ivshmem_common_class_init,
1010    .interfaces = (InterfaceInfo[]) {
1011        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1012        { },
1013    },
1014};
1015
1016static const VMStateDescription ivshmem_plain_vmsd = {
1017    .name = TYPE_IVSHMEM_PLAIN,
1018    .version_id = 0,
1019    .minimum_version_id = 0,
1020    .pre_load = ivshmem_pre_load,
1021    .post_load = ivshmem_post_load,
1022    .fields = (VMStateField[]) {
1023        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1024        VMSTATE_UINT32(intrstatus, IVShmemState),
1025        VMSTATE_UINT32(intrmask, IVShmemState),
1026        VMSTATE_END_OF_LIST()
1027    },
1028};
1029
1030static Property ivshmem_plain_properties[] = {
1031    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1032    DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
1033                     HostMemoryBackend *),
1034    DEFINE_PROP_END_OF_LIST(),
1035};
1036
1037static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
1038{
1039    IVShmemState *s = IVSHMEM_COMMON(dev);
1040
1041    if (!s->hostmem) {
1042        error_setg(errp, "You must specify a 'memdev'");
1043        return;
1044    } else if (host_memory_backend_is_mapped(s->hostmem)) {
1045        error_setg(errp, "can't use already busy memdev: %s",
1046                   object_get_canonical_path_component(OBJECT(s->hostmem)));
1047        return;
1048    }
1049
1050    ivshmem_common_realize(dev, errp);
1051}
1052
1053static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
1054{
1055    DeviceClass *dc = DEVICE_CLASS(klass);
1056    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1057
1058    k->realize = ivshmem_plain_realize;
1059    device_class_set_props(dc, ivshmem_plain_properties);
1060    dc->vmsd = &ivshmem_plain_vmsd;
1061}
1062
1063static const TypeInfo ivshmem_plain_info = {
1064    .name          = TYPE_IVSHMEM_PLAIN,
1065    .parent        = TYPE_IVSHMEM_COMMON,
1066    .instance_size = sizeof(IVShmemState),
1067    .class_init    = ivshmem_plain_class_init,
1068};
1069
1070static const VMStateDescription ivshmem_doorbell_vmsd = {
1071    .name = TYPE_IVSHMEM_DOORBELL,
1072    .version_id = 0,
1073    .minimum_version_id = 0,
1074    .pre_load = ivshmem_pre_load,
1075    .post_load = ivshmem_post_load,
1076    .fields = (VMStateField[]) {
1077        VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
1078        VMSTATE_MSIX(parent_obj, IVShmemState),
1079        VMSTATE_UINT32(intrstatus, IVShmemState),
1080        VMSTATE_UINT32(intrmask, IVShmemState),
1081        VMSTATE_END_OF_LIST()
1082    },
1083};
1084
1085static Property ivshmem_doorbell_properties[] = {
1086    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
1087    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
1088    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
1089                    true),
1090    DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
1091    DEFINE_PROP_END_OF_LIST(),
1092};
1093
1094static void ivshmem_doorbell_init(Object *obj)
1095{
1096    IVShmemState *s = IVSHMEM_DOORBELL(obj);
1097
1098    s->features |= (1 << IVSHMEM_MSI);
1099}
1100
1101static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
1102{
1103    IVShmemState *s = IVSHMEM_COMMON(dev);
1104
1105    if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
1106        error_setg(errp, "You must specify a 'chardev'");
1107        return;
1108    }
1109
1110    ivshmem_common_realize(dev, errp);
1111}
1112
1113static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
1114{
1115    DeviceClass *dc = DEVICE_CLASS(klass);
1116    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1117
1118    k->realize = ivshmem_doorbell_realize;
1119    device_class_set_props(dc, ivshmem_doorbell_properties);
1120    dc->vmsd = &ivshmem_doorbell_vmsd;
1121}
1122
1123static const TypeInfo ivshmem_doorbell_info = {
1124    .name          = TYPE_IVSHMEM_DOORBELL,
1125    .parent        = TYPE_IVSHMEM_COMMON,
1126    .instance_size = sizeof(IVShmemState),
1127    .instance_init = ivshmem_doorbell_init,
1128    .class_init    = ivshmem_doorbell_class_init,
1129};
1130
1131static void ivshmem_register_types(void)
1132{
1133    type_register_static(&ivshmem_common_info);
1134    type_register_static(&ivshmem_plain_info);
1135    type_register_static(&ivshmem_doorbell_info);
1136}
1137
1138type_init(ivshmem_register_types)
1139