qemu/hw/hyperv/vmbus.c
<<
>>
Prefs
   1/*
   2 * QEMU Hyper-V VMBus
   3 *
   4 * Copyright (c) 2017-2018 Virtuozzo International GmbH.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/error-report.h"
  12#include "qemu/main-loop.h"
  13#include "qapi/error.h"
  14#include "migration/vmstate.h"
  15#include "hw/qdev-properties.h"
  16#include "hw/qdev-properties-system.h"
  17#include "hw/hyperv/hyperv.h"
  18#include "hw/hyperv/vmbus.h"
  19#include "hw/hyperv/vmbus-bridge.h"
  20#include "hw/sysbus.h"
  21#include "cpu.h"
  22#include "trace.h"
  23
  24enum {
  25    VMGPADL_INIT,
  26    VMGPADL_ALIVE,
  27    VMGPADL_TEARINGDOWN,
  28    VMGPADL_TORNDOWN,
  29};
  30
  31struct VMBusGpadl {
  32    /* GPADL id */
  33    uint32_t id;
  34    /* associated channel id (rudimentary?) */
  35    uint32_t child_relid;
  36
  37    /* number of pages in the GPADL as declared in GPADL_HEADER message */
  38    uint32_t num_gfns;
  39    /*
  40     * Due to limited message size, GPADL may not fit fully in a single
  41     * GPADL_HEADER message, and is further popluated using GPADL_BODY
  42     * messages.  @seen_gfns is the number of pages seen so far; once it
  43     * reaches @num_gfns, the GPADL is ready to use.
  44     */
  45    uint32_t seen_gfns;
  46    /* array of GFNs (of size @num_gfns once allocated) */
  47    uint64_t *gfns;
  48
  49    uint8_t state;
  50
  51    QTAILQ_ENTRY(VMBusGpadl) link;
  52    VMBus *vmbus;
  53    unsigned refcount;
  54};
  55
  56/*
  57 * Wrap sequential read from / write to GPADL.
  58 */
  59typedef struct GpadlIter {
  60    VMBusGpadl *gpadl;
  61    AddressSpace *as;
  62    DMADirection dir;
  63    /* offset into GPADL where the next i/o will be performed */
  64    uint32_t off;
  65    /*
  66     * Cached mapping of the currently accessed page, up to page boundary.
  67     * Updated lazily on i/o.
  68     * Note: MemoryRegionCache can not be used here because pages in the GPADL
  69     * are non-contiguous and may belong to different memory regions.
  70     */
  71    void *map;
  72    /* offset after last i/o (i.e. not affected by seek) */
  73    uint32_t last_off;
  74    /*
  75     * Indicator that the iterator is active and may have a cached mapping.
  76     * Allows to enforce bracketing of all i/o (which may create cached
  77     * mappings) and thus exclude mapping leaks.
  78     */
  79    bool active;
  80} GpadlIter;
  81
  82/*
  83 * Ring buffer.  There are two of them, sitting in the same GPADL, for each
  84 * channel.
  85 * Each ring buffer consists of a set of pages, with the first page containing
  86 * the ring buffer header, and the remaining pages being for data packets.
  87 */
  88typedef struct VMBusRingBufCommon {
  89    AddressSpace *as;
  90    /* GPA of the ring buffer header */
  91    dma_addr_t rb_addr;
  92    /* start and length of the ring buffer data area within GPADL */
  93    uint32_t base;
  94    uint32_t len;
  95
  96    GpadlIter iter;
  97} VMBusRingBufCommon;
  98
  99typedef struct VMBusSendRingBuf {
 100    VMBusRingBufCommon common;
 101    /* current write index, to be committed at the end of send */
 102    uint32_t wr_idx;
 103    /* write index at the start of send */
 104    uint32_t last_wr_idx;
 105    /* space to be requested from the guest */
 106    uint32_t wanted;
 107    /* space reserved for planned sends */
 108    uint32_t reserved;
 109    /* last seen read index */
 110    uint32_t last_seen_rd_idx;
 111} VMBusSendRingBuf;
 112
 113typedef struct VMBusRecvRingBuf {
 114    VMBusRingBufCommon common;
 115    /* current read index, to be committed at the end of receive */
 116    uint32_t rd_idx;
 117    /* read index at the start of receive */
 118    uint32_t last_rd_idx;
 119    /* last seen write index */
 120    uint32_t last_seen_wr_idx;
 121} VMBusRecvRingBuf;
 122
 123
 124enum {
 125    VMOFFER_INIT,
 126    VMOFFER_SENDING,
 127    VMOFFER_SENT,
 128};
 129
 130enum {
 131    VMCHAN_INIT,
 132    VMCHAN_OPENING,
 133    VMCHAN_OPEN,
 134};
 135
 136struct VMBusChannel {
 137    VMBusDevice *dev;
 138
 139    /* channel id */
 140    uint32_t id;
 141    /*
 142     * subchannel index within the device; subchannel #0 is "primary" and
 143     * always exists
 144     */
 145    uint16_t subchan_idx;
 146    uint32_t open_id;
 147    /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
 148    uint32_t target_vp;
 149    /* GPADL id to use for the ring buffers */
 150    uint32_t ringbuf_gpadl;
 151    /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
 152    uint32_t ringbuf_send_offset;
 153
 154    uint8_t offer_state;
 155    uint8_t state;
 156    bool is_open;
 157
 158    /* main device worker; copied from the device class */
 159    VMBusChannelNotifyCb notify_cb;
 160    /*
 161     * guest->host notifications, either sent directly or dispatched via
 162     * interrupt page (older VMBus)
 163     */
 164    EventNotifier notifier;
 165
 166    VMBus *vmbus;
 167    /*
 168     * SINT route to signal with host->guest notifications; may be shared with
 169     * the main VMBus SINT route
 170     */
 171    HvSintRoute *notify_route;
 172    VMBusGpadl *gpadl;
 173
 174    VMBusSendRingBuf send_ringbuf;
 175    VMBusRecvRingBuf recv_ringbuf;
 176
 177    QTAILQ_ENTRY(VMBusChannel) link;
 178};
 179
 180/*
 181 * Hyper-V spec mandates that every message port has 16 buffers, which means
 182 * that the guest can post up to this many messages without blocking.
 183 * Therefore a queue for incoming messages has to be provided.
 184 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
 185 * doesn't transition to a new state until the message is known to have been
 186 * successfully delivered to the respective SynIC message slot.
 187 */
 188#define HV_MSG_QUEUE_LEN     16
 189
 190/* Hyper-V devices never use channel #0.  Must be something special. */
 191#define VMBUS_FIRST_CHANID      1
 192/* Each channel occupies one bit within a single event page sint slot. */
 193#define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
 194/* Leave a few connection numbers for other purposes. */
 195#define VMBUS_CHAN_CONNECTION_OFFSET     16
 196
 197/*
 198 * Since the success or failure of sending a message is reported
 199 * asynchronously, the VMBus state machine has effectively two entry points:
 200 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
 201 * message delivery status becomes known).  Both are run as oneshot BHs on the
 202 * main aio context, ensuring serialization.
 203 */
 204enum {
 205    VMBUS_LISTEN,
 206    VMBUS_HANDSHAKE,
 207    VMBUS_OFFER,
 208    VMBUS_CREATE_GPADL,
 209    VMBUS_TEARDOWN_GPADL,
 210    VMBUS_OPEN_CHANNEL,
 211    VMBUS_UNLOAD,
 212    VMBUS_STATE_MAX
 213};
 214
 215struct VMBus {
 216    BusState parent;
 217
 218    uint8_t state;
 219    /* protection against recursive aio_poll (see vmbus_run) */
 220    bool in_progress;
 221    /* whether there's a message being delivered to the guest */
 222    bool msg_in_progress;
 223    uint32_t version;
 224    /* VP_INDEX of the vCPU to send messages and interrupts to */
 225    uint32_t target_vp;
 226    HvSintRoute *sint_route;
 227    /*
 228     * interrupt page for older protocol versions; newer ones use SynIC event
 229     * flags directly
 230     */
 231    hwaddr int_page_gpa;
 232
 233    DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
 234
 235    /* incoming message queue */
 236    struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
 237    uint8_t rx_queue_head;
 238    uint8_t rx_queue_size;
 239    QemuMutex rx_queue_lock;
 240
 241    QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
 242    QTAILQ_HEAD(, VMBusChannel) channel_list;
 243
 244    /*
 245     * guest->host notifications for older VMBus, to be dispatched via
 246     * interrupt page
 247     */
 248    EventNotifier notifier;
 249};
 250
 251static bool gpadl_full(VMBusGpadl *gpadl)
 252{
 253    return gpadl->seen_gfns == gpadl->num_gfns;
 254}
 255
 256static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
 257                                uint32_t child_relid, uint32_t num_gfns)
 258{
 259    VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
 260
 261    gpadl->id = id;
 262    gpadl->child_relid = child_relid;
 263    gpadl->num_gfns = num_gfns;
 264    gpadl->gfns = g_new(uint64_t, num_gfns);
 265    QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
 266    gpadl->vmbus = vmbus;
 267    gpadl->refcount = 1;
 268    return gpadl;
 269}
 270
 271static void free_gpadl(VMBusGpadl *gpadl)
 272{
 273    QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
 274    g_free(gpadl->gfns);
 275    g_free(gpadl);
 276}
 277
 278static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
 279{
 280    VMBusGpadl *gpadl;
 281    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 282        if (gpadl->id == gpadl_id) {
 283            return gpadl;
 284        }
 285    }
 286    return NULL;
 287}
 288
 289VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
 290{
 291    VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
 292    if (!gpadl || !gpadl_full(gpadl)) {
 293        return NULL;
 294    }
 295    gpadl->refcount++;
 296    return gpadl;
 297}
 298
 299void vmbus_put_gpadl(VMBusGpadl *gpadl)
 300{
 301    if (!gpadl) {
 302        return;
 303    }
 304    if (--gpadl->refcount) {
 305        return;
 306    }
 307    free_gpadl(gpadl);
 308}
 309
 310uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
 311{
 312    return gpadl->num_gfns * TARGET_PAGE_SIZE;
 313}
 314
 315static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
 316                            AddressSpace *as, DMADirection dir)
 317{
 318    iter->gpadl = gpadl;
 319    iter->as = as;
 320    iter->dir = dir;
 321    iter->active = false;
 322}
 323
 324static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
 325{
 326    uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 327    uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
 328
 329    /* mapping is only done to do non-zero amount of i/o */
 330    assert(iter->last_off > 0);
 331    assert(map_start_in_page < io_end_in_page);
 332
 333    dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
 334                     iter->dir, io_end_in_page - map_start_in_page);
 335}
 336
 337/*
 338 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
 339 * The direction of the copy is determined by @iter->dir.
 340 * The caller must ensure the operation overflows neither @buf nor the GPADL
 341 * (there's an assert for the latter).
 342 * Reuse the currently mapped page in the GPADL if possible.
 343 */
 344static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
 345{
 346    ssize_t ret = len;
 347
 348    assert(iter->active);
 349
 350    while (len) {
 351        uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
 352        uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
 353        uint32_t cplen = MIN(pgleft, len);
 354        void *p;
 355
 356        /* try to reuse the cached mapping */
 357        if (iter->map) {
 358            uint32_t map_start_in_page =
 359                (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 360            uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
 361            uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
 362            if (off_base != mapped_base || off_in_page < map_start_in_page) {
 363                gpadl_iter_cache_unmap(iter);
 364                iter->map = NULL;
 365            }
 366        }
 367
 368        if (!iter->map) {
 369            dma_addr_t maddr;
 370            dma_addr_t mlen = pgleft;
 371            uint32_t idx = iter->off >> TARGET_PAGE_BITS;
 372            assert(idx < iter->gpadl->num_gfns);
 373
 374            maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
 375
 376            iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
 377            if (mlen != pgleft) {
 378                dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
 379                iter->map = NULL;
 380                return -EFAULT;
 381            }
 382        }
 383
 384        p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
 385                off_in_page);
 386        if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
 387            memcpy(p, buf, cplen);
 388        } else {
 389            memcpy(buf, p, cplen);
 390        }
 391
 392        buf += cplen;
 393        len -= cplen;
 394        iter->off += cplen;
 395        iter->last_off = iter->off;
 396    }
 397
 398    return ret;
 399}
 400
 401/*
 402 * Position the iterator @iter at new offset @new_off.
 403 * If this results in the cached mapping being unusable with the new offset,
 404 * unmap it.
 405 */
 406static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
 407{
 408    assert(iter->active);
 409    iter->off = new_off;
 410}
 411
 412/*
 413 * Start a series of i/o on the GPADL.
 414 * After this i/o and seek operations on @iter become legal.
 415 */
 416static inline void gpadl_iter_start_io(GpadlIter *iter)
 417{
 418    assert(!iter->active);
 419    /* mapping is cached lazily on i/o */
 420    iter->map = NULL;
 421    iter->active = true;
 422}
 423
 424/*
 425 * End the eariler started series of i/o on the GPADL and release the cached
 426 * mapping if any.
 427 */
 428static inline void gpadl_iter_end_io(GpadlIter *iter)
 429{
 430    assert(iter->active);
 431
 432    if (iter->map) {
 433        gpadl_iter_cache_unmap(iter);
 434    }
 435
 436    iter->active = false;
 437}
 438
 439static void vmbus_resched(VMBus *vmbus);
 440static void vmbus_msg_cb(void *data, int status);
 441
 442ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
 443                           const struct iovec *iov, size_t iov_cnt)
 444{
 445    GpadlIter iter;
 446    size_t i;
 447    ssize_t ret = 0;
 448
 449    gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
 450                    DMA_DIRECTION_FROM_DEVICE);
 451    gpadl_iter_start_io(&iter);
 452    gpadl_iter_seek(&iter, off);
 453    for (i = 0; i < iov_cnt; i++) {
 454        ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
 455        if (ret < 0) {
 456            goto out;
 457        }
 458    }
 459out:
 460    gpadl_iter_end_io(&iter);
 461    return ret;
 462}
 463
 464int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 465                  unsigned iov_cnt, size_t len, size_t off)
 466{
 467    int ret_cnt = 0, ret;
 468    unsigned i;
 469    QEMUSGList *sgl = &req->sgl;
 470    ScatterGatherEntry *sg = sgl->sg;
 471
 472    for (i = 0; i < sgl->nsg; i++) {
 473        if (sg[i].len > off) {
 474            break;
 475        }
 476        off -= sg[i].len;
 477    }
 478    for (; len && i < sgl->nsg; i++) {
 479        dma_addr_t mlen = MIN(sg[i].len - off, len);
 480        dma_addr_t addr = sg[i].base + off;
 481        len -= mlen;
 482        off = 0;
 483
 484        for (; mlen; ret_cnt++) {
 485            dma_addr_t l = mlen;
 486            dma_addr_t a = addr;
 487
 488            if (ret_cnt == iov_cnt) {
 489                ret = -ENOBUFS;
 490                goto err;
 491            }
 492
 493            iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
 494            if (!l) {
 495                ret = -EFAULT;
 496                goto err;
 497            }
 498            iov[ret_cnt].iov_len = l;
 499            addr += l;
 500            mlen -= l;
 501        }
 502    }
 503
 504    return ret_cnt;
 505err:
 506    vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
 507    return ret;
 508}
 509
 510void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 511                     unsigned iov_cnt, size_t accessed)
 512{
 513    QEMUSGList *sgl = &req->sgl;
 514    unsigned i;
 515
 516    for (i = 0; i < iov_cnt; i++) {
 517        size_t acsd = MIN(accessed, iov[i].iov_len);
 518        dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
 519        accessed -= acsd;
 520    }
 521}
 522
 523static const VMStateDescription vmstate_gpadl = {
 524    .name = "vmbus/gpadl",
 525    .version_id = 0,
 526    .minimum_version_id = 0,
 527    .fields = (VMStateField[]) {
 528        VMSTATE_UINT32(id, VMBusGpadl),
 529        VMSTATE_UINT32(child_relid, VMBusGpadl),
 530        VMSTATE_UINT32(num_gfns, VMBusGpadl),
 531        VMSTATE_UINT32(seen_gfns, VMBusGpadl),
 532        VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
 533                                    vmstate_info_uint64, uint64_t),
 534        VMSTATE_UINT8(state, VMBusGpadl),
 535        VMSTATE_END_OF_LIST()
 536    }
 537};
 538
 539/*
 540 * Wrap the index into a ring buffer of @len bytes.
 541 * @idx is assumed not to exceed twice the size of the ringbuffer, so only
 542 * single wraparound is considered.
 543 */
 544static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
 545{
 546    if (idx >= len) {
 547        idx -= len;
 548    }
 549    return idx;
 550}
 551
 552/*
 553 * Circular difference between two indices into a ring buffer of @len bytes.
 554 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
 555 * up write index but not vice versa.
 556 */
 557static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
 558                                    bool allow_catchup)
 559{
 560    return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
 561}
 562
 563static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
 564{
 565    vmbus_ring_buffer *rb;
 566    dma_addr_t mlen = sizeof(*rb);
 567
 568    rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
 569                        DMA_DIRECTION_FROM_DEVICE);
 570    if (mlen != sizeof(*rb)) {
 571        dma_memory_unmap(ringbuf->as, rb, mlen,
 572                         DMA_DIRECTION_FROM_DEVICE, 0);
 573        return NULL;
 574    }
 575    return rb;
 576}
 577
 578static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
 579                              vmbus_ring_buffer *rb, bool dirty)
 580{
 581    assert(rb);
 582
 583    dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
 584                     dirty ? sizeof(*rb) : 0);
 585}
 586
 587static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
 588                                AddressSpace *as, DMADirection dir,
 589                                uint32_t begin, uint32_t end)
 590{
 591    ringbuf->as = as;
 592    ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
 593    ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
 594    ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
 595    gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
 596}
 597
 598static int ringbufs_init(VMBusChannel *chan)
 599{
 600    vmbus_ring_buffer *rb;
 601    VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
 602    VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
 603
 604    if (chan->ringbuf_send_offset <= 1 ||
 605        chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
 606        return -EINVAL;
 607    }
 608
 609    ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 610                        DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
 611    ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 612                        DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
 613                        chan->gpadl->num_gfns);
 614    send_ringbuf->wanted = 0;
 615    send_ringbuf->reserved = 0;
 616
 617    rb = ringbuf_map_hdr(&recv_ringbuf->common);
 618    if (!rb) {
 619        return -EFAULT;
 620    }
 621    recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
 622    ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
 623
 624    rb = ringbuf_map_hdr(&send_ringbuf->common);
 625    if (!rb) {
 626        return -EFAULT;
 627    }
 628    send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
 629    send_ringbuf->last_seen_rd_idx = rb->read_index;
 630    rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
 631    ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
 632
 633    if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
 634        send_ringbuf->wr_idx >= send_ringbuf->common.len) {
 635        return -EOVERFLOW;
 636    }
 637
 638    return 0;
 639}
 640
 641/*
 642 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
 643 * around if needed.
 644 * @len is assumed not to exceed the size of the ringbuffer, so only single
 645 * wraparound is considered.
 646 */
 647static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
 648{
 649    ssize_t ret1 = 0, ret2 = 0;
 650    uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
 651
 652    if (len >= remain) {
 653        ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
 654        if (ret1 < 0) {
 655            return ret1;
 656        }
 657        gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
 658        buf += remain;
 659        len -= remain;
 660    }
 661    ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
 662    if (ret2 < 0) {
 663        return ret2;
 664    }
 665    return ret1 + ret2;
 666}
 667
 668/*
 669 * Position the circular iterator within @ringbuf to offset @new_off, wrapping
 670 * around if needed.
 671 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
 672 * single wraparound is considered.
 673 */
 674static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
 675{
 676    gpadl_iter_seek(&ringbuf->iter,
 677                    ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
 678}
 679
 680static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
 681{
 682    return ringbuf->iter.off - ringbuf->base;
 683}
 684
 685static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
 686{
 687    gpadl_iter_start_io(&ringbuf->iter);
 688}
 689
 690static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
 691{
 692    gpadl_iter_end_io(&ringbuf->iter);
 693}
 694
 695VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
 696{
 697    return chan->dev;
 698}
 699
 700VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
 701{
 702    if (chan_idx >= dev->num_channels) {
 703        return NULL;
 704    }
 705    return &dev->channels[chan_idx];
 706}
 707
 708uint32_t vmbus_channel_idx(VMBusChannel *chan)
 709{
 710    return chan - chan->dev->channels;
 711}
 712
 713void vmbus_channel_notify_host(VMBusChannel *chan)
 714{
 715    event_notifier_set(&chan->notifier);
 716}
 717
 718bool vmbus_channel_is_open(VMBusChannel *chan)
 719{
 720    return chan->is_open;
 721}
 722
 723/*
 724 * Notify the guest side about the data to work on in the channel ring buffer.
 725 * The notification is done by signaling a dedicated per-channel SynIC event
 726 * flag (more recent guests) or setting a bit in the interrupt page and firing
 727 * the VMBus SINT (older guests).
 728 */
 729static int vmbus_channel_notify_guest(VMBusChannel *chan)
 730{
 731    int res = 0;
 732    unsigned long *int_map, mask;
 733    unsigned idx;
 734    hwaddr addr = chan->vmbus->int_page_gpa;
 735    hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
 736
 737    trace_vmbus_channel_notify_guest(chan->id);
 738
 739    if (!addr) {
 740        return hyperv_set_event_flag(chan->notify_route, chan->id);
 741    }
 742
 743    int_map = cpu_physical_memory_map(addr, &len, 1);
 744    if (len != TARGET_PAGE_SIZE / 2) {
 745        res = -ENXIO;
 746        goto unmap;
 747    }
 748
 749    idx = BIT_WORD(chan->id);
 750    mask = BIT_MASK(chan->id);
 751    if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
 752        res = hyperv_sint_route_set_sint(chan->notify_route);
 753        dirty = len;
 754    }
 755
 756unmap:
 757    cpu_physical_memory_unmap(int_map, len, 1, dirty);
 758    return res;
 759}
 760
 761#define VMBUS_PKT_TRAILER      sizeof(uint64_t)
 762
 763static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
 764                                          uint32_t desclen, uint32_t msglen)
 765{
 766    hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
 767        DIV_ROUND_UP(desclen, sizeof(uint64_t));
 768    hdr->len_qwords = hdr->offset_qwords +
 769        DIV_ROUND_UP(msglen, sizeof(uint64_t));
 770    return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
 771}
 772
 773/*
 774 * Simplified ring buffer operation with paired barriers annotations in the
 775 * producer and consumer loops:
 776 *
 777 * producer                           * consumer
 778 * ~~~~~~~~                           * ~~~~~~~~
 779 * write pending_send_sz              * read write_index
 780 * smp_mb                       [A]   * smp_mb                       [C]
 781 * read read_index                    * read packet
 782 * smp_mb                       [B]   * read/write out-of-band data
 783 * read/write out-of-band data        * smp_mb                       [B]
 784 * write packet                       * write read_index
 785 * smp_mb                       [C]   * smp_mb                       [A]
 786 * write write_index                  * read pending_send_sz
 787 * smp_wmb                      [D]   * smp_rmb                      [D]
 788 * write pending_send_sz              * read write_index
 789 * ...                                * ...
 790 */
 791
 792static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
 793{
 794    /* don't trust guest data */
 795    if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
 796        return 0;
 797    }
 798    return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
 799                        ringbuf->common.len, false);
 800}
 801
 802static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
 803{
 804    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 805    vmbus_ring_buffer *rb;
 806    uint32_t written;
 807
 808    written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
 809                           ringbuf->common.len, true);
 810    if (!written) {
 811        return 0;
 812    }
 813
 814    rb = ringbuf_map_hdr(&ringbuf->common);
 815    if (!rb) {
 816        return -EFAULT;
 817    }
 818
 819    ringbuf->reserved -= written;
 820
 821    /* prevent reorder with the data operation and packet write */
 822    smp_mb();                   /* barrier pair [C] */
 823    rb->write_index = ringbuf->wr_idx;
 824
 825    /*
 826     * If the producer earlier indicated that it wants to be notified when the
 827     * consumer frees certain amount of space in the ring buffer, that amount
 828     * is reduced by the size of the completed write.
 829     */
 830    if (ringbuf->wanted) {
 831        /* otherwise reservation would fail */
 832        assert(ringbuf->wanted < written);
 833        ringbuf->wanted -= written;
 834        /* prevent reorder with write_index write */
 835        smp_wmb();              /* barrier pair [D] */
 836        rb->pending_send_sz = ringbuf->wanted;
 837    }
 838
 839    /* prevent reorder with write_index or pending_send_sz write */
 840    smp_mb();                   /* barrier pair [A] */
 841    ringbuf->last_seen_rd_idx = rb->read_index;
 842
 843    /*
 844     * The consumer may have missed the reduction of pending_send_sz and skip
 845     * notification, so re-check the blocking condition, and, if it's no longer
 846     * true, ensure processing another iteration by simulating consumer's
 847     * notification.
 848     */
 849    if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
 850        vmbus_channel_notify_host(chan);
 851    }
 852
 853    /* skip notification by consumer's request */
 854    if (rb->interrupt_mask) {
 855        goto out;
 856    }
 857
 858    /*
 859     * The consumer hasn't caught up with the producer's previous state so it's
 860     * not blocked.
 861     * (last_seen_rd_idx comes from the guest but it's safe to use w/o
 862     * validation here as it only affects notification.)
 863     */
 864    if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
 865                     ringbuf->common.len, true) > written) {
 866        goto out;
 867    }
 868
 869    vmbus_channel_notify_guest(chan);
 870out:
 871    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
 872    ringbuf->last_wr_idx = ringbuf->wr_idx;
 873    return written;
 874}
 875
 876int vmbus_channel_reserve(VMBusChannel *chan,
 877                          uint32_t desclen, uint32_t msglen)
 878{
 879    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 880    vmbus_ring_buffer *rb = NULL;
 881    vmbus_packet_hdr hdr;
 882    uint32_t needed = ringbuf->reserved +
 883        vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 884
 885    /* avoid touching the guest memory if possible */
 886    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 887        goto success;
 888    }
 889
 890    rb = ringbuf_map_hdr(&ringbuf->common);
 891    if (!rb) {
 892        return -EFAULT;
 893    }
 894
 895    /* fetch read index from guest memory and try again */
 896    ringbuf->last_seen_rd_idx = rb->read_index;
 897
 898    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 899        goto success;
 900    }
 901
 902    rb->pending_send_sz = needed;
 903
 904    /*
 905     * The consumer may have made progress and freed up some space before
 906     * seeing updated pending_send_sz, so re-read read_index (preventing
 907     * reorder with the pending_send_sz write) and try again.
 908     */
 909    smp_mb();                   /* barrier pair [A] */
 910    ringbuf->last_seen_rd_idx = rb->read_index;
 911
 912    if (needed > ringbuf_send_avail(ringbuf)) {
 913        goto out;
 914    }
 915
 916success:
 917    ringbuf->reserved = needed;
 918    needed = 0;
 919
 920    /* clear pending_send_sz if it was set */
 921    if (ringbuf->wanted) {
 922        if (!rb) {
 923            rb = ringbuf_map_hdr(&ringbuf->common);
 924            if (!rb) {
 925                /* failure to clear pending_send_sz is non-fatal */
 926                goto out;
 927            }
 928        }
 929
 930        rb->pending_send_sz = 0;
 931    }
 932
 933    /* prevent reorder of the following data operation with read_index read */
 934    smp_mb();                   /* barrier pair [B] */
 935
 936out:
 937    if (rb) {
 938        ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
 939    }
 940    ringbuf->wanted = needed;
 941    return needed ? -ENOSPC : 0;
 942}
 943
 944ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
 945                           void *desc, uint32_t desclen,
 946                           void *msg, uint32_t msglen,
 947                           bool need_comp, uint64_t transaction_id)
 948{
 949    ssize_t ret = 0;
 950    vmbus_packet_hdr hdr;
 951    uint32_t totlen;
 952    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 953
 954    if (!vmbus_channel_is_open(chan)) {
 955        return -EINVAL;
 956    }
 957
 958    totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 959    hdr.type = pkt_type;
 960    hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
 961    hdr.transaction_id = transaction_id;
 962
 963    assert(totlen <= ringbuf->reserved);
 964
 965    ringbuf_start_io(&ringbuf->common);
 966    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
 967    ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
 968    if (ret < 0) {
 969        goto out;
 970    }
 971    if (desclen) {
 972        assert(desc);
 973        ret = ringbuf_io(&ringbuf->common, desc, desclen);
 974        if (ret < 0) {
 975            goto out;
 976        }
 977        ringbuf_seek(&ringbuf->common,
 978                     ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
 979    }
 980    ret = ringbuf_io(&ringbuf->common, msg, msglen);
 981    if (ret < 0) {
 982        goto out;
 983    }
 984    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
 985    ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
 986    ret = 0;
 987out:
 988    ringbuf_end_io(&ringbuf->common);
 989    if (ret) {
 990        return ret;
 991    }
 992    return ringbuf_send_update_idx(chan);
 993}
 994
 995ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
 996                                      void *msg, uint32_t msglen)
 997{
 998    assert(req->need_comp);
 999    return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1000                              msg, msglen, false, req->transaction_id);
1001}
1002
1003static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1004                               VMBusRingBufCommon *ringbuf, uint32_t len)
1005{
1006    int ret;
1007    vmbus_pkt_gpa_direct hdr;
1008    hwaddr curaddr = 0;
1009    hwaddr curlen = 0;
1010    int num;
1011
1012    if (len < sizeof(hdr)) {
1013        return -EIO;
1014    }
1015    ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1016    if (ret < 0) {
1017        return ret;
1018    }
1019    len -= sizeof(hdr);
1020
1021    num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1022    if (num < 0) {
1023        return -EIO;
1024    }
1025    qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1026
1027    for (; hdr.rangecount; hdr.rangecount--) {
1028        vmbus_gpa_range range;
1029
1030        if (len < sizeof(range)) {
1031            goto eio;
1032        }
1033        ret = ringbuf_io(ringbuf, &range, sizeof(range));
1034        if (ret < 0) {
1035            goto err;
1036        }
1037        len -= sizeof(range);
1038
1039        if (range.byte_offset & TARGET_PAGE_MASK) {
1040            goto eio;
1041        }
1042
1043        for (; range.byte_count; range.byte_offset = 0) {
1044            uint64_t paddr;
1045            uint32_t plen = MIN(range.byte_count,
1046                                TARGET_PAGE_SIZE - range.byte_offset);
1047
1048            if (len < sizeof(uint64_t)) {
1049                goto eio;
1050            }
1051            ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1052            if (ret < 0) {
1053                goto err;
1054            }
1055            len -= sizeof(uint64_t);
1056            paddr <<= TARGET_PAGE_BITS;
1057            paddr |= range.byte_offset;
1058            range.byte_count -= plen;
1059
1060            if (curaddr + curlen == paddr) {
1061                /* consecutive fragments - join */
1062                curlen += plen;
1063            } else {
1064                if (curlen) {
1065                    qemu_sglist_add(sgl, curaddr, curlen);
1066                }
1067
1068                curaddr = paddr;
1069                curlen = plen;
1070            }
1071        }
1072    }
1073
1074    if (curlen) {
1075        qemu_sglist_add(sgl, curaddr, curlen);
1076    }
1077
1078    return 0;
1079eio:
1080    ret = -EIO;
1081err:
1082    qemu_sglist_destroy(sgl);
1083    return ret;
1084}
1085
1086static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1087                                     uint32_t size, uint16_t pkt_type,
1088                                     uint32_t msglen, uint64_t transaction_id,
1089                                     bool need_comp)
1090{
1091    VMBusChanReq *req;
1092    uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1093    uint32_t totlen = msgoff + msglen;
1094
1095    req = g_malloc0(totlen);
1096    req->chan = chan;
1097    req->pkt_type = pkt_type;
1098    req->msg = (void *)req + msgoff;
1099    req->msglen = msglen;
1100    req->transaction_id = transaction_id;
1101    req->need_comp = need_comp;
1102    return req;
1103}
1104
1105int vmbus_channel_recv_start(VMBusChannel *chan)
1106{
1107    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1108    vmbus_ring_buffer *rb;
1109
1110    rb = ringbuf_map_hdr(&ringbuf->common);
1111    if (!rb) {
1112        return -EFAULT;
1113    }
1114    ringbuf->last_seen_wr_idx = rb->write_index;
1115    ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1116
1117    if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1118        return -EOVERFLOW;
1119    }
1120
1121    /* prevent reorder of the following data operation with write_index read */
1122    smp_mb();                   /* barrier pair [C] */
1123    return 0;
1124}
1125
1126void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1127{
1128    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1129    vmbus_packet_hdr hdr = {};
1130    VMBusChanReq *req;
1131    uint32_t avail;
1132    uint32_t totlen, pktlen, msglen, msgoff, desclen;
1133
1134    assert(size >= sizeof(*req));
1135
1136    /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1137    avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1138                         ringbuf->common.len, true);
1139    if (avail < sizeof(hdr)) {
1140        return NULL;
1141    }
1142
1143    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1144    if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1145        return NULL;
1146    }
1147
1148    pktlen = hdr.len_qwords * sizeof(uint64_t);
1149    totlen = pktlen + VMBUS_PKT_TRAILER;
1150    if (totlen > avail) {
1151        return NULL;
1152    }
1153
1154    msgoff = hdr.offset_qwords * sizeof(uint64_t);
1155    if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1156        error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1157        return NULL;
1158    }
1159
1160    msglen = pktlen - msgoff;
1161
1162    req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1163                          hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1164
1165    switch (hdr.type) {
1166    case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1167        desclen = msgoff - sizeof(hdr);
1168        if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1169                                desclen) < 0) {
1170            error_report("%s: failed to convert GPA ranges to SGL", __func__);
1171            goto free_req;
1172        }
1173        break;
1174    case VMBUS_PACKET_DATA_INBAND:
1175    case VMBUS_PACKET_COMP:
1176        break;
1177    default:
1178        error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1179        goto free_req;
1180    }
1181
1182    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1183    if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1184        goto free_req;
1185    }
1186    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1187
1188    return req;
1189free_req:
1190    vmbus_free_req(req);
1191    return NULL;
1192}
1193
1194void vmbus_channel_recv_pop(VMBusChannel *chan)
1195{
1196    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1197    ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1198}
1199
1200ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1201{
1202    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1203    vmbus_ring_buffer *rb;
1204    uint32_t read;
1205
1206    read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1207                        ringbuf->common.len, true);
1208    if (!read) {
1209        return 0;
1210    }
1211
1212    rb = ringbuf_map_hdr(&ringbuf->common);
1213    if (!rb) {
1214        return -EFAULT;
1215    }
1216
1217    /* prevent reorder with the data operation and packet read */
1218    smp_mb();                   /* barrier pair [B] */
1219    rb->read_index = ringbuf->rd_idx;
1220
1221    /* prevent reorder of the following pending_send_sz read */
1222    smp_mb();                   /* barrier pair [A] */
1223
1224    if (rb->interrupt_mask) {
1225        goto out;
1226    }
1227
1228    if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1229        uint32_t wr_idx, wr_avail;
1230        uint32_t wanted = rb->pending_send_sz;
1231
1232        if (!wanted) {
1233            goto out;
1234        }
1235
1236        /* prevent reorder with pending_send_sz read */
1237        smp_rmb();              /* barrier pair [D] */
1238        wr_idx = rb->write_index;
1239
1240        wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1241                                true);
1242
1243        /* the producer wasn't blocked on the consumer state */
1244        if (wr_avail >= read + wanted) {
1245            goto out;
1246        }
1247        /* there's not enough space for the producer to make progress */
1248        if (wr_avail < wanted) {
1249            goto out;
1250        }
1251    }
1252
1253    vmbus_channel_notify_guest(chan);
1254out:
1255    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1256    ringbuf->last_rd_idx = ringbuf->rd_idx;
1257    return read;
1258}
1259
1260void vmbus_free_req(void *req)
1261{
1262    VMBusChanReq *r = req;
1263
1264    if (!req) {
1265        return;
1266    }
1267
1268    if (r->sgl.dev) {
1269        qemu_sglist_destroy(&r->sgl);
1270    }
1271    g_free(req);
1272}
1273
1274static const VMStateDescription vmstate_sgent = {
1275    .name = "vmbus/sgentry",
1276    .version_id = 0,
1277    .minimum_version_id = 0,
1278    .fields = (VMStateField[]) {
1279        VMSTATE_UINT64(base, ScatterGatherEntry),
1280        VMSTATE_UINT64(len, ScatterGatherEntry),
1281        VMSTATE_END_OF_LIST()
1282    }
1283};
1284
1285typedef struct VMBusChanReqSave {
1286    uint16_t chan_idx;
1287    uint16_t pkt_type;
1288    uint32_t msglen;
1289    void *msg;
1290    uint64_t transaction_id;
1291    bool need_comp;
1292    uint32_t num;
1293    ScatterGatherEntry *sgl;
1294} VMBusChanReqSave;
1295
1296static const VMStateDescription vmstate_vmbus_chan_req = {
1297    .name = "vmbus/vmbus_chan_req",
1298    .version_id = 0,
1299    .minimum_version_id = 0,
1300    .fields = (VMStateField[]) {
1301        VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1302        VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1303        VMSTATE_UINT32(msglen, VMBusChanReqSave),
1304        VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1305        VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1306        VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1307        VMSTATE_UINT32(num, VMBusChanReqSave),
1308        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1309                                             vmstate_sgent, ScatterGatherEntry),
1310        VMSTATE_END_OF_LIST()
1311    }
1312};
1313
1314void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1315{
1316    VMBusChanReqSave req_save;
1317
1318    req_save.chan_idx = req->chan->subchan_idx;
1319    req_save.pkt_type = req->pkt_type;
1320    req_save.msglen = req->msglen;
1321    req_save.msg = req->msg;
1322    req_save.transaction_id = req->transaction_id;
1323    req_save.need_comp = req->need_comp;
1324    req_save.num = req->sgl.nsg;
1325    req_save.sgl = g_memdup(req->sgl.sg,
1326                            req_save.num * sizeof(ScatterGatherEntry));
1327
1328    vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1329
1330    g_free(req_save.sgl);
1331}
1332
1333void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1334{
1335    VMBusChanReqSave req_save;
1336    VMBusChanReq *req = NULL;
1337    VMBusChannel *chan = NULL;
1338    uint32_t i;
1339
1340    vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1341
1342    if (req_save.chan_idx >= dev->num_channels) {
1343        error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1344                     req_save.chan_idx, dev->num_channels);
1345        goto out;
1346    }
1347    chan = &dev->channels[req_save.chan_idx];
1348
1349    if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1350        goto out;
1351    }
1352
1353    req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1354                          req_save.transaction_id, req_save.need_comp);
1355    if (req_save.msglen) {
1356        memcpy(req->msg, req_save.msg, req_save.msglen);
1357    }
1358
1359    for (i = 0; i < req_save.num; i++) {
1360        qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1361    }
1362
1363out:
1364    if (req_save.msglen) {
1365        g_free(req_save.msg);
1366    }
1367    if (req_save.num) {
1368        g_free(req_save.sgl);
1369    }
1370    return req;
1371}
1372
1373static void channel_event_cb(EventNotifier *e)
1374{
1375    VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1376    if (event_notifier_test_and_clear(e)) {
1377        /*
1378         * All receives are supposed to happen within the device worker, so
1379         * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1380         * potentially reuse the cached mapping throughout the worker.
1381         * Can't do this for sends as they may happen outside the device
1382         * worker.
1383         */
1384        VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1385        ringbuf_start_io(&ringbuf->common);
1386        chan->notify_cb(chan);
1387        ringbuf_end_io(&ringbuf->common);
1388
1389    }
1390}
1391
1392static int alloc_chan_id(VMBus *vmbus)
1393{
1394    int ret;
1395
1396    ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1397    if (ret == VMBUS_CHANID_COUNT) {
1398        return -ENOMEM;
1399    }
1400    return ret + VMBUS_FIRST_CHANID;
1401}
1402
1403static int register_chan_id(VMBusChannel *chan)
1404{
1405    return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1406                            chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1407}
1408
1409static void unregister_chan_id(VMBusChannel *chan)
1410{
1411    clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1412}
1413
1414static uint32_t chan_connection_id(VMBusChannel *chan)
1415{
1416    return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1417}
1418
1419static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1420                         VMBusChannel *chan, uint16_t idx, Error **errp)
1421{
1422    int res;
1423
1424    chan->dev = dev;
1425    chan->notify_cb = vdc->chan_notify_cb;
1426    chan->subchan_idx = idx;
1427    chan->vmbus = vmbus;
1428
1429    res = alloc_chan_id(vmbus);
1430    if (res < 0) {
1431        error_setg(errp, "no spare channel id");
1432        return;
1433    }
1434    chan->id = res;
1435    register_chan_id(chan);
1436
1437    /*
1438     * The guest drivers depend on the device subchannels (idx #1+) to be
1439     * offered after the primary channel (idx #0) of that device.  To ensure
1440     * that, record the channels on the channel list in the order they appear
1441     * within the device.
1442     */
1443    QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1444}
1445
1446static void deinit_channel(VMBusChannel *chan)
1447{
1448    assert(chan->state == VMCHAN_INIT);
1449    QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1450    unregister_chan_id(chan);
1451}
1452
1453static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1454{
1455    uint16_t i;
1456    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1457    Error *err = NULL;
1458
1459    dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1460    if (dev->num_channels < 1) {
1461        error_setg(errp, "invalid #channels: %u", dev->num_channels);
1462        return;
1463    }
1464
1465    dev->channels = g_new0(VMBusChannel, dev->num_channels);
1466    for (i = 0; i < dev->num_channels; i++) {
1467        init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1468        if (err) {
1469            goto err_init;
1470        }
1471    }
1472
1473    return;
1474
1475err_init:
1476    while (i--) {
1477        deinit_channel(&dev->channels[i]);
1478    }
1479    error_propagate(errp, err);
1480}
1481
1482static void free_channels(VMBusDevice *dev)
1483{
1484    uint16_t i;
1485    for (i = 0; i < dev->num_channels; i++) {
1486        deinit_channel(&dev->channels[i]);
1487    }
1488    g_free(dev->channels);
1489}
1490
1491static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1492{
1493    VMBusChannel *chan;
1494
1495    if (vp_index == vmbus->target_vp) {
1496        hyperv_sint_route_ref(vmbus->sint_route);
1497        return vmbus->sint_route;
1498    }
1499
1500    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1501        if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1502            hyperv_sint_route_ref(chan->notify_route);
1503            return chan->notify_route;
1504        }
1505    }
1506
1507    return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1508}
1509
1510static void open_channel(VMBusChannel *chan)
1511{
1512    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1513
1514    chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1515    if (!chan->gpadl) {
1516        return;
1517    }
1518
1519    if (ringbufs_init(chan)) {
1520        goto put_gpadl;
1521    }
1522
1523    if (event_notifier_init(&chan->notifier, 0)) {
1524        goto put_gpadl;
1525    }
1526
1527    event_notifier_set_handler(&chan->notifier, channel_event_cb);
1528
1529    if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1530                                      &chan->notifier)) {
1531        goto cleanup_notifier;
1532    }
1533
1534    chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1535    if (!chan->notify_route) {
1536        goto clear_event_flag_handler;
1537    }
1538
1539    if (vdc->open_channel && vdc->open_channel(chan)) {
1540        goto unref_sint_route;
1541    }
1542
1543    chan->is_open = true;
1544    return;
1545
1546unref_sint_route:
1547    hyperv_sint_route_unref(chan->notify_route);
1548clear_event_flag_handler:
1549    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1550cleanup_notifier:
1551    event_notifier_set_handler(&chan->notifier, NULL);
1552    event_notifier_cleanup(&chan->notifier);
1553put_gpadl:
1554    vmbus_put_gpadl(chan->gpadl);
1555}
1556
1557static void close_channel(VMBusChannel *chan)
1558{
1559    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1560
1561    if (!chan->is_open) {
1562        return;
1563    }
1564
1565    if (vdc->close_channel) {
1566        vdc->close_channel(chan);
1567    }
1568
1569    hyperv_sint_route_unref(chan->notify_route);
1570    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1571    event_notifier_set_handler(&chan->notifier, NULL);
1572    event_notifier_cleanup(&chan->notifier);
1573    vmbus_put_gpadl(chan->gpadl);
1574    chan->is_open = false;
1575}
1576
1577static int channel_post_load(void *opaque, int version_id)
1578{
1579    VMBusChannel *chan = opaque;
1580
1581    return register_chan_id(chan);
1582}
1583
1584static const VMStateDescription vmstate_channel = {
1585    .name = "vmbus/channel",
1586    .version_id = 0,
1587    .minimum_version_id = 0,
1588    .post_load = channel_post_load,
1589    .fields = (VMStateField[]) {
1590        VMSTATE_UINT32(id, VMBusChannel),
1591        VMSTATE_UINT16(subchan_idx, VMBusChannel),
1592        VMSTATE_UINT32(open_id, VMBusChannel),
1593        VMSTATE_UINT32(target_vp, VMBusChannel),
1594        VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1595        VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1596        VMSTATE_UINT8(offer_state, VMBusChannel),
1597        VMSTATE_UINT8(state, VMBusChannel),
1598        VMSTATE_END_OF_LIST()
1599    }
1600};
1601
1602static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1603{
1604    VMBusChannel *chan;
1605    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1606        if (chan->id == id) {
1607            return chan;
1608        }
1609    }
1610    return NULL;
1611}
1612
1613static int enqueue_incoming_message(VMBus *vmbus,
1614                                    const struct hyperv_post_message_input *msg)
1615{
1616    int ret = 0;
1617    uint8_t idx, prev_size;
1618
1619    qemu_mutex_lock(&vmbus->rx_queue_lock);
1620
1621    if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1622        ret = -ENOBUFS;
1623        goto out;
1624    }
1625
1626    prev_size = vmbus->rx_queue_size;
1627    idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1628    memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1629    vmbus->rx_queue_size++;
1630
1631    /* only need to resched if the queue was empty before */
1632    if (!prev_size) {
1633        vmbus_resched(vmbus);
1634    }
1635out:
1636    qemu_mutex_unlock(&vmbus->rx_queue_lock);
1637    return ret;
1638}
1639
1640static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1641                                   void *data)
1642{
1643    VMBus *vmbus = data;
1644    struct vmbus_message_header *vmbus_msg;
1645
1646    if (msg->message_type != HV_MESSAGE_VMBUS) {
1647        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1648    }
1649
1650    if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1651        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1652    }
1653
1654    vmbus_msg = (struct vmbus_message_header *)msg->payload;
1655
1656    trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1657
1658    if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1659        vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1660        error_report("vmbus: unknown message type %#x",
1661                     vmbus_msg->message_type);
1662        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1663    }
1664
1665    if (enqueue_incoming_message(vmbus, msg)) {
1666        return HV_STATUS_INSUFFICIENT_BUFFERS;
1667    }
1668    return HV_STATUS_SUCCESS;
1669}
1670
1671static bool vmbus_initialized(VMBus *vmbus)
1672{
1673    return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1674}
1675
1676static void vmbus_reset_all(VMBus *vmbus)
1677{
1678    qbus_reset_all(BUS(vmbus));
1679}
1680
1681static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1682{
1683    int ret;
1684    struct hyperv_message msg = {
1685        .header.message_type = HV_MESSAGE_VMBUS,
1686    };
1687
1688    assert(!vmbus->msg_in_progress);
1689    assert(msglen <= sizeof(msg.payload));
1690    assert(msglen >= sizeof(struct vmbus_message_header));
1691
1692    vmbus->msg_in_progress = true;
1693
1694    trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1695                         msglen);
1696
1697    memcpy(msg.payload, msgdata, msglen);
1698    msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1699
1700    ret = hyperv_post_msg(vmbus->sint_route, &msg);
1701    if (ret == 0 || ret == -EAGAIN) {
1702        return;
1703    }
1704
1705    error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1706    vmbus_reset_all(vmbus);
1707}
1708
1709static int vmbus_init(VMBus *vmbus)
1710{
1711    if (vmbus->target_vp != (uint32_t)-1) {
1712        vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1713                                                  vmbus_msg_cb, vmbus);
1714        if (!vmbus->sint_route) {
1715            error_report("failed to set up SINT route");
1716            return -ENOMEM;
1717        }
1718    }
1719    return 0;
1720}
1721
1722static void vmbus_deinit(VMBus *vmbus)
1723{
1724    VMBusGpadl *gpadl, *tmp_gpadl;
1725    VMBusChannel *chan;
1726
1727    QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1728        if (gpadl->state == VMGPADL_TORNDOWN) {
1729            continue;
1730        }
1731        vmbus_put_gpadl(gpadl);
1732    }
1733
1734    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1735        chan->offer_state = VMOFFER_INIT;
1736    }
1737
1738    hyperv_sint_route_unref(vmbus->sint_route);
1739    vmbus->sint_route = NULL;
1740    vmbus->int_page_gpa = 0;
1741    vmbus->target_vp = (uint32_t)-1;
1742    vmbus->version = 0;
1743    vmbus->state = VMBUS_LISTEN;
1744    vmbus->msg_in_progress = false;
1745}
1746
1747static void handle_initiate_contact(VMBus *vmbus,
1748                                    vmbus_message_initiate_contact *msg,
1749                                    uint32_t msglen)
1750{
1751    if (msglen < sizeof(*msg)) {
1752        return;
1753    }
1754
1755    trace_vmbus_initiate_contact(msg->version_requested >> 16,
1756                                 msg->version_requested & 0xffff,
1757                                 msg->target_vcpu, msg->monitor_page1,
1758                                 msg->monitor_page2, msg->interrupt_page);
1759
1760    /*
1761     * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1762     * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1763     * before handing over to OS loader.
1764     */
1765    vmbus_reset_all(vmbus);
1766
1767    vmbus->target_vp = msg->target_vcpu;
1768    vmbus->version = msg->version_requested;
1769    if (vmbus->version < VMBUS_VERSION_WIN8) {
1770        /* linux passes interrupt page even when it doesn't need it */
1771        vmbus->int_page_gpa = msg->interrupt_page;
1772    }
1773    vmbus->state = VMBUS_HANDSHAKE;
1774
1775    if (vmbus_init(vmbus)) {
1776        error_report("failed to init vmbus; aborting");
1777        vmbus_deinit(vmbus);
1778        return;
1779    }
1780}
1781
1782static void send_handshake(VMBus *vmbus)
1783{
1784    struct vmbus_message_version_response msg = {
1785        .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1786        .version_supported = vmbus_initialized(vmbus),
1787    };
1788
1789    post_msg(vmbus, &msg, sizeof(msg));
1790}
1791
1792static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1793{
1794    VMBusChannel *chan;
1795
1796    if (!vmbus_initialized(vmbus)) {
1797        return;
1798    }
1799
1800    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1801        if (chan->offer_state == VMOFFER_INIT) {
1802            chan->offer_state = VMOFFER_SENDING;
1803            break;
1804        }
1805    }
1806
1807    vmbus->state = VMBUS_OFFER;
1808}
1809
1810static void send_offer(VMBus *vmbus)
1811{
1812    VMBusChannel *chan;
1813    struct vmbus_message_header alloffers_msg = {
1814        .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1815    };
1816
1817    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1818        if (chan->offer_state == VMOFFER_SENDING) {
1819            VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1820            /* Hyper-V wants LE GUIDs */
1821            QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1822            QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1823            struct vmbus_message_offer_channel msg = {
1824                .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1825                .child_relid = chan->id,
1826                .connection_id = chan_connection_id(chan),
1827                .channel_flags = vdc->channel_flags,
1828                .mmio_size_mb = vdc->mmio_size_mb,
1829                .sub_channel_index = vmbus_channel_idx(chan),
1830                .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1831            };
1832
1833            memcpy(msg.type_uuid, &classid, sizeof(classid));
1834            memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1835
1836            trace_vmbus_send_offer(chan->id, chan->dev);
1837
1838            post_msg(vmbus, &msg, sizeof(msg));
1839            return;
1840        }
1841    }
1842
1843    /* no more offers, send terminator message */
1844    trace_vmbus_terminate_offers();
1845    post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1846}
1847
1848static bool complete_offer(VMBus *vmbus)
1849{
1850    VMBusChannel *chan;
1851
1852    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1853        if (chan->offer_state == VMOFFER_SENDING) {
1854            chan->offer_state = VMOFFER_SENT;
1855            goto next_offer;
1856        }
1857    }
1858    /*
1859     * no transitioning channels found so this is completing the terminator
1860     * message, and vmbus can move to the next state
1861     */
1862    return true;
1863
1864next_offer:
1865    /* try to mark another channel for offering */
1866    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1867        if (chan->offer_state == VMOFFER_INIT) {
1868            chan->offer_state = VMOFFER_SENDING;
1869            break;
1870        }
1871    }
1872    /*
1873     * if an offer has been sent there are more offers or the terminator yet to
1874     * send, so no state transition for vmbus
1875     */
1876    return false;
1877}
1878
1879
1880static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1881                                uint32_t msglen)
1882{
1883    VMBusGpadl *gpadl;
1884    uint32_t num_gfns, i;
1885
1886    /* must include at least one gpa range */
1887    if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1888        !vmbus_initialized(vmbus)) {
1889        return;
1890    }
1891
1892    num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1893               sizeof(msg->range[0].pfn_array[0]);
1894
1895    trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1896
1897    /*
1898     * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1899     * ranges each with arbitrary size and alignment.  However in practice only
1900     * single-range page-aligned GPADLs have been observed so just ignore
1901     * anything else and simplify things greatly.
1902     */
1903    if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1904        (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1905        return;
1906    }
1907
1908    /* ignore requests to create already existing GPADLs */
1909    if (find_gpadl(vmbus, msg->gpadl_id)) {
1910        return;
1911    }
1912
1913    gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1914
1915    for (i = 0; i < num_gfns &&
1916         (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1917         i++) {
1918        gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1919    }
1920
1921    if (gpadl_full(gpadl)) {
1922        vmbus->state = VMBUS_CREATE_GPADL;
1923    }
1924}
1925
1926static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1927                              uint32_t msglen)
1928{
1929    VMBusGpadl *gpadl;
1930    uint32_t num_gfns_left, i;
1931
1932    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1933        return;
1934    }
1935
1936    trace_vmbus_gpadl_body(msg->gpadl_id);
1937
1938    gpadl = find_gpadl(vmbus, msg->gpadl_id);
1939    if (!gpadl) {
1940        return;
1941    }
1942
1943    num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1944    assert(num_gfns_left);
1945
1946    for (i = 0; i < num_gfns_left &&
1947         (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1948        gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1949    }
1950
1951    if (gpadl_full(gpadl)) {
1952        vmbus->state = VMBUS_CREATE_GPADL;
1953    }
1954}
1955
1956static void send_create_gpadl(VMBus *vmbus)
1957{
1958    VMBusGpadl *gpadl;
1959
1960    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1961        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1962            struct vmbus_message_gpadl_created msg = {
1963                .header.message_type = VMBUS_MSG_GPADL_CREATED,
1964                .gpadl_id = gpadl->id,
1965                .child_relid = gpadl->child_relid,
1966            };
1967
1968            trace_vmbus_gpadl_created(gpadl->id);
1969            post_msg(vmbus, &msg, sizeof(msg));
1970            return;
1971        }
1972    }
1973
1974    assert(false);
1975}
1976
1977static bool complete_create_gpadl(VMBus *vmbus)
1978{
1979    VMBusGpadl *gpadl;
1980
1981    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1982        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1983            gpadl->state = VMGPADL_ALIVE;
1984
1985            return true;
1986        }
1987    }
1988
1989    assert(false);
1990    return false;
1991}
1992
1993static void handle_gpadl_teardown(VMBus *vmbus,
1994                                  vmbus_message_gpadl_teardown *msg,
1995                                  uint32_t msglen)
1996{
1997    VMBusGpadl *gpadl;
1998
1999    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2000        return;
2001    }
2002
2003    trace_vmbus_gpadl_teardown(msg->gpadl_id);
2004
2005    gpadl = find_gpadl(vmbus, msg->gpadl_id);
2006    if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2007        return;
2008    }
2009
2010    gpadl->state = VMGPADL_TEARINGDOWN;
2011    vmbus->state = VMBUS_TEARDOWN_GPADL;
2012}
2013
2014static void send_teardown_gpadl(VMBus *vmbus)
2015{
2016    VMBusGpadl *gpadl;
2017
2018    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2019        if (gpadl->state == VMGPADL_TEARINGDOWN) {
2020            struct vmbus_message_gpadl_torndown msg = {
2021                .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2022                .gpadl_id = gpadl->id,
2023            };
2024
2025            trace_vmbus_gpadl_torndown(gpadl->id);
2026            post_msg(vmbus, &msg, sizeof(msg));
2027            return;
2028        }
2029    }
2030
2031    assert(false);
2032}
2033
2034static bool complete_teardown_gpadl(VMBus *vmbus)
2035{
2036    VMBusGpadl *gpadl;
2037
2038    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2039        if (gpadl->state == VMGPADL_TEARINGDOWN) {
2040            gpadl->state = VMGPADL_TORNDOWN;
2041            vmbus_put_gpadl(gpadl);
2042            return true;
2043        }
2044    }
2045
2046    assert(false);
2047    return false;
2048}
2049
2050static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2051                                uint32_t msglen)
2052{
2053    VMBusChannel *chan;
2054
2055    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2056        return;
2057    }
2058
2059    trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2060                             msg->target_vp);
2061    chan = find_channel(vmbus, msg->child_relid);
2062    if (!chan || chan->state != VMCHAN_INIT) {
2063        return;
2064    }
2065
2066    chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2067    chan->ringbuf_send_offset = msg->ring_buffer_offset;
2068    chan->target_vp = msg->target_vp;
2069    chan->open_id = msg->open_id;
2070
2071    open_channel(chan);
2072
2073    chan->state = VMCHAN_OPENING;
2074    vmbus->state = VMBUS_OPEN_CHANNEL;
2075}
2076
2077static void send_open_channel(VMBus *vmbus)
2078{
2079    VMBusChannel *chan;
2080
2081    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2082        if (chan->state == VMCHAN_OPENING) {
2083            struct vmbus_message_open_result msg = {
2084                .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2085                .child_relid = chan->id,
2086                .open_id = chan->open_id,
2087                .status = !vmbus_channel_is_open(chan),
2088            };
2089
2090            trace_vmbus_channel_open(chan->id, msg.status);
2091            post_msg(vmbus, &msg, sizeof(msg));
2092            return;
2093        }
2094    }
2095
2096    assert(false);
2097}
2098
2099static bool complete_open_channel(VMBus *vmbus)
2100{
2101    VMBusChannel *chan;
2102
2103    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2104        if (chan->state == VMCHAN_OPENING) {
2105            if (vmbus_channel_is_open(chan)) {
2106                chan->state = VMCHAN_OPEN;
2107                /*
2108                 * simulate guest notification of ringbuffer space made
2109                 * available, for the channel protocols where the host
2110                 * initiates the communication
2111                 */
2112                vmbus_channel_notify_host(chan);
2113            } else {
2114                chan->state = VMCHAN_INIT;
2115            }
2116            return true;
2117        }
2118    }
2119
2120    assert(false);
2121    return false;
2122}
2123
2124static void vdev_reset_on_close(VMBusDevice *vdev)
2125{
2126    uint16_t i;
2127
2128    for (i = 0; i < vdev->num_channels; i++) {
2129        if (vmbus_channel_is_open(&vdev->channels[i])) {
2130            return;
2131        }
2132    }
2133
2134    /* all channels closed -- reset device */
2135    qdev_reset_all(DEVICE(vdev));
2136}
2137
2138static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2139                                 uint32_t msglen)
2140{
2141    VMBusChannel *chan;
2142
2143    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2144        return;
2145    }
2146
2147    trace_vmbus_close_channel(msg->child_relid);
2148
2149    chan = find_channel(vmbus, msg->child_relid);
2150    if (!chan) {
2151        return;
2152    }
2153
2154    close_channel(chan);
2155    chan->state = VMCHAN_INIT;
2156
2157    vdev_reset_on_close(chan->dev);
2158}
2159
2160static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2161{
2162    vmbus->state = VMBUS_UNLOAD;
2163}
2164
2165static void send_unload(VMBus *vmbus)
2166{
2167    vmbus_message_header msg = {
2168        .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2169    };
2170
2171    qemu_mutex_lock(&vmbus->rx_queue_lock);
2172    vmbus->rx_queue_size = 0;
2173    qemu_mutex_unlock(&vmbus->rx_queue_lock);
2174
2175    post_msg(vmbus, &msg, sizeof(msg));
2176    return;
2177}
2178
2179static bool complete_unload(VMBus *vmbus)
2180{
2181    vmbus_reset_all(vmbus);
2182    return true;
2183}
2184
2185static void process_message(VMBus *vmbus)
2186{
2187    struct hyperv_post_message_input *hv_msg;
2188    struct vmbus_message_header *msg;
2189    void *msgdata;
2190    uint32_t msglen;
2191
2192    qemu_mutex_lock(&vmbus->rx_queue_lock);
2193
2194    if (!vmbus->rx_queue_size) {
2195        goto unlock;
2196    }
2197
2198    hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2199    msglen =  hv_msg->payload_size;
2200    if (msglen < sizeof(*msg)) {
2201        goto out;
2202    }
2203    msgdata = hv_msg->payload;
2204    msg = (struct vmbus_message_header *)msgdata;
2205
2206    trace_vmbus_process_incoming_message(msg->message_type);
2207
2208    switch (msg->message_type) {
2209    case VMBUS_MSG_INITIATE_CONTACT:
2210        handle_initiate_contact(vmbus, msgdata, msglen);
2211        break;
2212    case VMBUS_MSG_REQUESTOFFERS:
2213        handle_request_offers(vmbus, msgdata, msglen);
2214        break;
2215    case VMBUS_MSG_GPADL_HEADER:
2216        handle_gpadl_header(vmbus, msgdata, msglen);
2217        break;
2218    case VMBUS_MSG_GPADL_BODY:
2219        handle_gpadl_body(vmbus, msgdata, msglen);
2220        break;
2221    case VMBUS_MSG_GPADL_TEARDOWN:
2222        handle_gpadl_teardown(vmbus, msgdata, msglen);
2223        break;
2224    case VMBUS_MSG_OPENCHANNEL:
2225        handle_open_channel(vmbus, msgdata, msglen);
2226        break;
2227    case VMBUS_MSG_CLOSECHANNEL:
2228        handle_close_channel(vmbus, msgdata, msglen);
2229        break;
2230    case VMBUS_MSG_UNLOAD:
2231        handle_unload(vmbus, msgdata, msglen);
2232        break;
2233    default:
2234        error_report("unknown message type %#x", msg->message_type);
2235        break;
2236    }
2237
2238out:
2239    vmbus->rx_queue_size--;
2240    vmbus->rx_queue_head++;
2241    vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2242
2243    vmbus_resched(vmbus);
2244unlock:
2245    qemu_mutex_unlock(&vmbus->rx_queue_lock);
2246}
2247
2248static const struct {
2249    void (*run)(VMBus *vmbus);
2250    bool (*complete)(VMBus *vmbus);
2251} state_runner[] = {
2252    [VMBUS_LISTEN]         = {process_message,     NULL},
2253    [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2254    [VMBUS_OFFER]          = {send_offer,          complete_offer},
2255    [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2256    [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2257    [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2258    [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2259};
2260
2261static void vmbus_do_run(VMBus *vmbus)
2262{
2263    if (vmbus->msg_in_progress) {
2264        return;
2265    }
2266
2267    assert(vmbus->state < VMBUS_STATE_MAX);
2268    assert(state_runner[vmbus->state].run);
2269    state_runner[vmbus->state].run(vmbus);
2270}
2271
2272static void vmbus_run(void *opaque)
2273{
2274    VMBus *vmbus = opaque;
2275
2276    /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2277    if (vmbus->in_progress) {
2278        return;
2279    }
2280
2281    vmbus->in_progress = true;
2282    /*
2283     * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2284     * should go *after* the code that can result in aio_poll; otherwise
2285     * reschedules can be missed.  No idea how to enforce that.
2286     */
2287    vmbus_do_run(vmbus);
2288    vmbus->in_progress = false;
2289}
2290
2291static void vmbus_msg_cb(void *data, int status)
2292{
2293    VMBus *vmbus = data;
2294    bool (*complete)(VMBus *vmbus);
2295
2296    assert(vmbus->msg_in_progress);
2297
2298    trace_vmbus_msg_cb(status);
2299
2300    if (status == -EAGAIN) {
2301        goto out;
2302    }
2303    if (status) {
2304        error_report("message delivery fatal failure: %d; aborting vmbus",
2305                     status);
2306        vmbus_reset_all(vmbus);
2307        return;
2308    }
2309
2310    assert(vmbus->state < VMBUS_STATE_MAX);
2311    complete = state_runner[vmbus->state].complete;
2312    if (!complete || complete(vmbus)) {
2313        vmbus->state = VMBUS_LISTEN;
2314    }
2315out:
2316    vmbus->msg_in_progress = false;
2317    vmbus_resched(vmbus);
2318}
2319
2320static void vmbus_resched(VMBus *vmbus)
2321{
2322    aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2323}
2324
2325static void vmbus_signal_event(EventNotifier *e)
2326{
2327    VMBusChannel *chan;
2328    VMBus *vmbus = container_of(e, VMBus, notifier);
2329    unsigned long *int_map;
2330    hwaddr addr, len;
2331    bool is_dirty = false;
2332
2333    if (!event_notifier_test_and_clear(e)) {
2334        return;
2335    }
2336
2337    trace_vmbus_signal_event();
2338
2339    if (!vmbus->int_page_gpa) {
2340        return;
2341    }
2342
2343    addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2344    len = TARGET_PAGE_SIZE / 2;
2345    int_map = cpu_physical_memory_map(addr, &len, 1);
2346    if (len != TARGET_PAGE_SIZE / 2) {
2347        goto unmap;
2348    }
2349
2350    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2351        if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2352            if (!vmbus_channel_is_open(chan)) {
2353                continue;
2354            }
2355            vmbus_channel_notify_host(chan);
2356            is_dirty = true;
2357        }
2358    }
2359
2360unmap:
2361    cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2362}
2363
2364static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2365{
2366    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2367    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2368    VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2369    BusChild *child;
2370    Error *err = NULL;
2371    char idstr[UUID_FMT_LEN + 1];
2372
2373    assert(!qemu_uuid_is_null(&vdev->instanceid));
2374
2375    if (!qemu_uuid_is_null(&vdc->instanceid)) {
2376        /* Class wants to only have a single instance with a fixed UUID */
2377        if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2378            error_setg(&err, "instance id can't be changed");
2379            goto error_out;
2380        }
2381    }
2382
2383    /* Check for instance id collision for this class id */
2384    QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2385        VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2386
2387        if (child_dev == vdev) {
2388            continue;
2389        }
2390
2391        if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2392            qemu_uuid_unparse(&vdev->instanceid, idstr);
2393            error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2394            goto error_out;
2395        }
2396    }
2397
2398    vdev->dma_as = &address_space_memory;
2399
2400    create_channels(vmbus, vdev, &err);
2401    if (err) {
2402        goto error_out;
2403    }
2404
2405    if (vdc->vmdev_realize) {
2406        vdc->vmdev_realize(vdev, &err);
2407        if (err) {
2408            goto err_vdc_realize;
2409        }
2410    }
2411    return;
2412
2413err_vdc_realize:
2414    free_channels(vdev);
2415error_out:
2416    error_propagate(errp, err);
2417}
2418
2419static void vmbus_dev_reset(DeviceState *dev)
2420{
2421    uint16_t i;
2422    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2423    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2424
2425    if (vdev->channels) {
2426        for (i = 0; i < vdev->num_channels; i++) {
2427            VMBusChannel *chan = &vdev->channels[i];
2428            close_channel(chan);
2429            chan->state = VMCHAN_INIT;
2430        }
2431    }
2432
2433    if (vdc->vmdev_reset) {
2434        vdc->vmdev_reset(vdev);
2435    }
2436}
2437
2438static void vmbus_dev_unrealize(DeviceState *dev)
2439{
2440    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2441    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2442
2443    if (vdc->vmdev_unrealize) {
2444        vdc->vmdev_unrealize(vdev);
2445    }
2446    free_channels(vdev);
2447}
2448
2449static Property vmbus_dev_props[] = {
2450    DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2451    DEFINE_PROP_END_OF_LIST()
2452};
2453
2454
2455static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2456{
2457    DeviceClass *kdev = DEVICE_CLASS(klass);
2458    device_class_set_props(kdev, vmbus_dev_props);
2459    kdev->bus_type = TYPE_VMBUS;
2460    kdev->realize = vmbus_dev_realize;
2461    kdev->unrealize = vmbus_dev_unrealize;
2462    kdev->reset = vmbus_dev_reset;
2463}
2464
2465static void vmbus_dev_instance_init(Object *obj)
2466{
2467    VMBusDevice *vdev = VMBUS_DEVICE(obj);
2468    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2469
2470    if (!qemu_uuid_is_null(&vdc->instanceid)) {
2471        /* Class wants to only have a single instance with a fixed UUID */
2472        vdev->instanceid = vdc->instanceid;
2473    }
2474}
2475
2476const VMStateDescription vmstate_vmbus_dev = {
2477    .name = TYPE_VMBUS_DEVICE,
2478    .version_id = 0,
2479    .minimum_version_id = 0,
2480    .fields = (VMStateField[]) {
2481        VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2482        VMSTATE_UINT16(num_channels, VMBusDevice),
2483        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2484                                             num_channels, vmstate_channel,
2485                                             VMBusChannel),
2486        VMSTATE_END_OF_LIST()
2487    }
2488};
2489
2490/* vmbus generic device base */
2491static const TypeInfo vmbus_dev_type_info = {
2492    .name = TYPE_VMBUS_DEVICE,
2493    .parent = TYPE_DEVICE,
2494    .abstract = true,
2495    .instance_size = sizeof(VMBusDevice),
2496    .class_size = sizeof(VMBusDeviceClass),
2497    .class_init = vmbus_dev_class_init,
2498    .instance_init = vmbus_dev_instance_init,
2499};
2500
2501static void vmbus_realize(BusState *bus, Error **errp)
2502{
2503    int ret = 0;
2504    Error *local_err = NULL;
2505    VMBus *vmbus = VMBUS(bus);
2506
2507    qemu_mutex_init(&vmbus->rx_queue_lock);
2508
2509    QTAILQ_INIT(&vmbus->gpadl_list);
2510    QTAILQ_INIT(&vmbus->channel_list);
2511
2512    ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2513                                 vmbus_recv_message, vmbus);
2514    if (ret != 0) {
2515        error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2516        goto error_out;
2517    }
2518
2519    ret = event_notifier_init(&vmbus->notifier, 0);
2520    if (ret != 0) {
2521        error_setg(&local_err, "event notifier failed to init with %d", ret);
2522        goto remove_msg_handler;
2523    }
2524
2525    event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2526    ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2527                                        &vmbus->notifier);
2528    if (ret != 0) {
2529        error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2530        goto clear_event_notifier;
2531    }
2532
2533    return;
2534
2535clear_event_notifier:
2536    event_notifier_cleanup(&vmbus->notifier);
2537remove_msg_handler:
2538    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2539error_out:
2540    qemu_mutex_destroy(&vmbus->rx_queue_lock);
2541    error_propagate(errp, local_err);
2542}
2543
2544static void vmbus_unrealize(BusState *bus)
2545{
2546    VMBus *vmbus = VMBUS(bus);
2547
2548    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2549    hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2550    event_notifier_cleanup(&vmbus->notifier);
2551
2552    qemu_mutex_destroy(&vmbus->rx_queue_lock);
2553}
2554
2555static void vmbus_reset(BusState *bus)
2556{
2557    vmbus_deinit(VMBUS(bus));
2558}
2559
2560static char *vmbus_get_dev_path(DeviceState *dev)
2561{
2562    BusState *bus = qdev_get_parent_bus(dev);
2563    return qdev_get_dev_path(bus->parent);
2564}
2565
2566static char *vmbus_get_fw_dev_path(DeviceState *dev)
2567{
2568    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2569    char uuid[UUID_FMT_LEN + 1];
2570
2571    qemu_uuid_unparse(&vdev->instanceid, uuid);
2572    return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2573}
2574
2575static void vmbus_class_init(ObjectClass *klass, void *data)
2576{
2577    BusClass *k = BUS_CLASS(klass);
2578
2579    k->get_dev_path = vmbus_get_dev_path;
2580    k->get_fw_dev_path = vmbus_get_fw_dev_path;
2581    k->realize = vmbus_realize;
2582    k->unrealize = vmbus_unrealize;
2583    k->reset = vmbus_reset;
2584}
2585
2586static int vmbus_pre_load(void *opaque)
2587{
2588    VMBusChannel *chan;
2589    VMBus *vmbus = VMBUS(opaque);
2590
2591    /*
2592     * channel IDs allocated by the source will come in the migration stream
2593     * for each channel, so clean up the ones allocated at realize
2594     */
2595    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2596        unregister_chan_id(chan);
2597    }
2598
2599    return 0;
2600}
2601static int vmbus_post_load(void *opaque, int version_id)
2602{
2603    int ret;
2604    VMBus *vmbus = VMBUS(opaque);
2605    VMBusGpadl *gpadl;
2606    VMBusChannel *chan;
2607
2608    ret = vmbus_init(vmbus);
2609    if (ret) {
2610        return ret;
2611    }
2612
2613    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2614        gpadl->vmbus = vmbus;
2615        gpadl->refcount = 1;
2616    }
2617
2618    /*
2619     * reopening channels depends on initialized vmbus so it's done here
2620     * instead of channel_post_load()
2621     */
2622    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2623
2624        if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2625            open_channel(chan);
2626        }
2627
2628        if (chan->state != VMCHAN_OPEN) {
2629            continue;
2630        }
2631
2632        if (!vmbus_channel_is_open(chan)) {
2633            /* reopen failed, abort loading */
2634            return -1;
2635        }
2636
2637        /* resume processing on the guest side if it missed the notification */
2638        hyperv_sint_route_set_sint(chan->notify_route);
2639        /* ditto on the host side */
2640        vmbus_channel_notify_host(chan);
2641    }
2642
2643    vmbus_resched(vmbus);
2644    return 0;
2645}
2646
2647static const VMStateDescription vmstate_post_message_input = {
2648    .name = "vmbus/hyperv_post_message_input",
2649    .version_id = 0,
2650    .minimum_version_id = 0,
2651    .fields = (VMStateField[]) {
2652        /*
2653         * skip connection_id and message_type as they are validated before
2654         * queueing and ignored on dequeueing
2655         */
2656        VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2657        VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2658                            HV_MESSAGE_PAYLOAD_SIZE),
2659        VMSTATE_END_OF_LIST()
2660    }
2661};
2662
2663static bool vmbus_rx_queue_needed(void *opaque)
2664{
2665    VMBus *vmbus = VMBUS(opaque);
2666    return vmbus->rx_queue_size;
2667}
2668
2669static const VMStateDescription vmstate_rx_queue = {
2670    .name = "vmbus/rx_queue",
2671    .version_id = 0,
2672    .minimum_version_id = 0,
2673    .needed = vmbus_rx_queue_needed,
2674    .fields = (VMStateField[]) {
2675        VMSTATE_UINT8(rx_queue_head, VMBus),
2676        VMSTATE_UINT8(rx_queue_size, VMBus),
2677        VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2678                             HV_MSG_QUEUE_LEN, 0,
2679                             vmstate_post_message_input,
2680                             struct hyperv_post_message_input),
2681        VMSTATE_END_OF_LIST()
2682    }
2683};
2684
2685static const VMStateDescription vmstate_vmbus = {
2686    .name = TYPE_VMBUS,
2687    .version_id = 0,
2688    .minimum_version_id = 0,
2689    .pre_load = vmbus_pre_load,
2690    .post_load = vmbus_post_load,
2691    .fields = (VMStateField[]) {
2692        VMSTATE_UINT8(state, VMBus),
2693        VMSTATE_UINT32(version, VMBus),
2694        VMSTATE_UINT32(target_vp, VMBus),
2695        VMSTATE_UINT64(int_page_gpa, VMBus),
2696        VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2697                         vmstate_gpadl, VMBusGpadl, link),
2698        VMSTATE_END_OF_LIST()
2699    },
2700    .subsections = (const VMStateDescription * []) {
2701        &vmstate_rx_queue,
2702        NULL
2703    }
2704};
2705
2706static const TypeInfo vmbus_type_info = {
2707    .name = TYPE_VMBUS,
2708    .parent = TYPE_BUS,
2709    .instance_size = sizeof(VMBus),
2710    .class_init = vmbus_class_init,
2711};
2712
2713static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2714{
2715    VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2716
2717    /*
2718     * here there's at least one vmbus bridge that is being realized, so
2719     * vmbus_bridge_find can only return NULL if it's not unique
2720     */
2721    if (!vmbus_bridge_find()) {
2722        error_setg(errp, "there can be at most one %s in the system",
2723                   TYPE_VMBUS_BRIDGE);
2724        return;
2725    }
2726
2727    if (!hyperv_is_synic_enabled()) {
2728        error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2729        return;
2730    }
2731
2732    bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2733}
2734
2735static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2736{
2737    /* there can be only one VMBus */
2738    return g_strdup("0");
2739}
2740
2741static const VMStateDescription vmstate_vmbus_bridge = {
2742    .name = TYPE_VMBUS_BRIDGE,
2743    .version_id = 0,
2744    .minimum_version_id = 0,
2745    .fields = (VMStateField[]) {
2746        VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2747        VMSTATE_END_OF_LIST()
2748    },
2749};
2750
2751static Property vmbus_bridge_props[] = {
2752    DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2753    DEFINE_PROP_END_OF_LIST()
2754};
2755
2756static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2757{
2758    DeviceClass *k = DEVICE_CLASS(klass);
2759    SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2760
2761    k->realize = vmbus_bridge_realize;
2762    k->fw_name = "vmbus";
2763    sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2764    set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2765    k->vmsd = &vmstate_vmbus_bridge;
2766    device_class_set_props(k, vmbus_bridge_props);
2767    /* override SysBusDevice's default */
2768    k->user_creatable = true;
2769}
2770
2771static const TypeInfo vmbus_bridge_type_info = {
2772    .name = TYPE_VMBUS_BRIDGE,
2773    .parent = TYPE_SYS_BUS_DEVICE,
2774    .instance_size = sizeof(VMBusBridge),
2775    .class_init = vmbus_bridge_class_init,
2776};
2777
2778static void vmbus_register_types(void)
2779{
2780    type_register_static(&vmbus_bridge_type_info);
2781    type_register_static(&vmbus_dev_type_info);
2782    type_register_static(&vmbus_type_info);
2783}
2784
2785type_init(vmbus_register_types)
2786