qemu/hw/hyperv/vmbus.c
<<
>>
Prefs
   1/*
   2 * QEMU Hyper-V VMBus
   3 *
   4 * Copyright (c) 2017-2018 Virtuozzo International GmbH.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "qemu/error-report.h"
  12#include "qemu/main-loop.h"
  13#include "qapi/error.h"
  14#include "migration/vmstate.h"
  15#include "hw/qdev-properties.h"
  16#include "hw/qdev-properties-system.h"
  17#include "hw/hyperv/hyperv.h"
  18#include "hw/hyperv/vmbus.h"
  19#include "hw/hyperv/vmbus-bridge.h"
  20#include "hw/sysbus.h"
  21#include "cpu.h"
  22#include "trace.h"
  23
  24enum {
  25    VMGPADL_INIT,
  26    VMGPADL_ALIVE,
  27    VMGPADL_TEARINGDOWN,
  28    VMGPADL_TORNDOWN,
  29};
  30
  31struct VMBusGpadl {
  32    /* GPADL id */
  33    uint32_t id;
  34    /* associated channel id (rudimentary?) */
  35    uint32_t child_relid;
  36
  37    /* number of pages in the GPADL as declared in GPADL_HEADER message */
  38    uint32_t num_gfns;
  39    /*
  40     * Due to limited message size, GPADL may not fit fully in a single
  41     * GPADL_HEADER message, and is further popluated using GPADL_BODY
  42     * messages.  @seen_gfns is the number of pages seen so far; once it
  43     * reaches @num_gfns, the GPADL is ready to use.
  44     */
  45    uint32_t seen_gfns;
  46    /* array of GFNs (of size @num_gfns once allocated) */
  47    uint64_t *gfns;
  48
  49    uint8_t state;
  50
  51    QTAILQ_ENTRY(VMBusGpadl) link;
  52    VMBus *vmbus;
  53    unsigned refcount;
  54};
  55
  56/*
  57 * Wrap sequential read from / write to GPADL.
  58 */
  59typedef struct GpadlIter {
  60    VMBusGpadl *gpadl;
  61    AddressSpace *as;
  62    DMADirection dir;
  63    /* offset into GPADL where the next i/o will be performed */
  64    uint32_t off;
  65    /*
  66     * Cached mapping of the currently accessed page, up to page boundary.
  67     * Updated lazily on i/o.
  68     * Note: MemoryRegionCache can not be used here because pages in the GPADL
  69     * are non-contiguous and may belong to different memory regions.
  70     */
  71    void *map;
  72    /* offset after last i/o (i.e. not affected by seek) */
  73    uint32_t last_off;
  74    /*
  75     * Indicator that the iterator is active and may have a cached mapping.
  76     * Allows to enforce bracketing of all i/o (which may create cached
  77     * mappings) and thus exclude mapping leaks.
  78     */
  79    bool active;
  80} GpadlIter;
  81
  82/*
  83 * Ring buffer.  There are two of them, sitting in the same GPADL, for each
  84 * channel.
  85 * Each ring buffer consists of a set of pages, with the first page containing
  86 * the ring buffer header, and the remaining pages being for data packets.
  87 */
  88typedef struct VMBusRingBufCommon {
  89    AddressSpace *as;
  90    /* GPA of the ring buffer header */
  91    dma_addr_t rb_addr;
  92    /* start and length of the ring buffer data area within GPADL */
  93    uint32_t base;
  94    uint32_t len;
  95
  96    GpadlIter iter;
  97} VMBusRingBufCommon;
  98
  99typedef struct VMBusSendRingBuf {
 100    VMBusRingBufCommon common;
 101    /* current write index, to be committed at the end of send */
 102    uint32_t wr_idx;
 103    /* write index at the start of send */
 104    uint32_t last_wr_idx;
 105    /* space to be requested from the guest */
 106    uint32_t wanted;
 107    /* space reserved for planned sends */
 108    uint32_t reserved;
 109    /* last seen read index */
 110    uint32_t last_seen_rd_idx;
 111} VMBusSendRingBuf;
 112
 113typedef struct VMBusRecvRingBuf {
 114    VMBusRingBufCommon common;
 115    /* current read index, to be committed at the end of receive */
 116    uint32_t rd_idx;
 117    /* read index at the start of receive */
 118    uint32_t last_rd_idx;
 119    /* last seen write index */
 120    uint32_t last_seen_wr_idx;
 121} VMBusRecvRingBuf;
 122
 123
 124enum {
 125    VMOFFER_INIT,
 126    VMOFFER_SENDING,
 127    VMOFFER_SENT,
 128};
 129
 130enum {
 131    VMCHAN_INIT,
 132    VMCHAN_OPENING,
 133    VMCHAN_OPEN,
 134};
 135
 136struct VMBusChannel {
 137    VMBusDevice *dev;
 138
 139    /* channel id */
 140    uint32_t id;
 141    /*
 142     * subchannel index within the device; subchannel #0 is "primary" and
 143     * always exists
 144     */
 145    uint16_t subchan_idx;
 146    uint32_t open_id;
 147    /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
 148    uint32_t target_vp;
 149    /* GPADL id to use for the ring buffers */
 150    uint32_t ringbuf_gpadl;
 151    /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
 152    uint32_t ringbuf_send_offset;
 153
 154    uint8_t offer_state;
 155    uint8_t state;
 156    bool is_open;
 157
 158    /* main device worker; copied from the device class */
 159    VMBusChannelNotifyCb notify_cb;
 160    /*
 161     * guest->host notifications, either sent directly or dispatched via
 162     * interrupt page (older VMBus)
 163     */
 164    EventNotifier notifier;
 165
 166    VMBus *vmbus;
 167    /*
 168     * SINT route to signal with host->guest notifications; may be shared with
 169     * the main VMBus SINT route
 170     */
 171    HvSintRoute *notify_route;
 172    VMBusGpadl *gpadl;
 173
 174    VMBusSendRingBuf send_ringbuf;
 175    VMBusRecvRingBuf recv_ringbuf;
 176
 177    QTAILQ_ENTRY(VMBusChannel) link;
 178};
 179
 180/*
 181 * Hyper-V spec mandates that every message port has 16 buffers, which means
 182 * that the guest can post up to this many messages without blocking.
 183 * Therefore a queue for incoming messages has to be provided.
 184 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
 185 * doesn't transition to a new state until the message is known to have been
 186 * successfully delivered to the respective SynIC message slot.
 187 */
 188#define HV_MSG_QUEUE_LEN     16
 189
 190/* Hyper-V devices never use channel #0.  Must be something special. */
 191#define VMBUS_FIRST_CHANID      1
 192/* Each channel occupies one bit within a single event page sint slot. */
 193#define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
 194/* Leave a few connection numbers for other purposes. */
 195#define VMBUS_CHAN_CONNECTION_OFFSET     16
 196
 197/*
 198 * Since the success or failure of sending a message is reported
 199 * asynchronously, the VMBus state machine has effectively two entry points:
 200 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
 201 * message delivery status becomes known).  Both are run as oneshot BHs on the
 202 * main aio context, ensuring serialization.
 203 */
 204enum {
 205    VMBUS_LISTEN,
 206    VMBUS_HANDSHAKE,
 207    VMBUS_OFFER,
 208    VMBUS_CREATE_GPADL,
 209    VMBUS_TEARDOWN_GPADL,
 210    VMBUS_OPEN_CHANNEL,
 211    VMBUS_UNLOAD,
 212    VMBUS_STATE_MAX
 213};
 214
 215struct VMBus {
 216    BusState parent;
 217
 218    uint8_t state;
 219    /* protection against recursive aio_poll (see vmbus_run) */
 220    bool in_progress;
 221    /* whether there's a message being delivered to the guest */
 222    bool msg_in_progress;
 223    uint32_t version;
 224    /* VP_INDEX of the vCPU to send messages and interrupts to */
 225    uint32_t target_vp;
 226    HvSintRoute *sint_route;
 227    /*
 228     * interrupt page for older protocol versions; newer ones use SynIC event
 229     * flags directly
 230     */
 231    hwaddr int_page_gpa;
 232
 233    DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
 234
 235    /* incoming message queue */
 236    struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
 237    uint8_t rx_queue_head;
 238    uint8_t rx_queue_size;
 239    QemuMutex rx_queue_lock;
 240
 241    QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
 242    QTAILQ_HEAD(, VMBusChannel) channel_list;
 243
 244    /*
 245     * guest->host notifications for older VMBus, to be dispatched via
 246     * interrupt page
 247     */
 248    EventNotifier notifier;
 249};
 250
 251static bool gpadl_full(VMBusGpadl *gpadl)
 252{
 253    return gpadl->seen_gfns == gpadl->num_gfns;
 254}
 255
 256static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
 257                                uint32_t child_relid, uint32_t num_gfns)
 258{
 259    VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
 260
 261    gpadl->id = id;
 262    gpadl->child_relid = child_relid;
 263    gpadl->num_gfns = num_gfns;
 264    gpadl->gfns = g_new(uint64_t, num_gfns);
 265    QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
 266    gpadl->vmbus = vmbus;
 267    gpadl->refcount = 1;
 268    return gpadl;
 269}
 270
 271static void free_gpadl(VMBusGpadl *gpadl)
 272{
 273    QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
 274    g_free(gpadl->gfns);
 275    g_free(gpadl);
 276}
 277
 278static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
 279{
 280    VMBusGpadl *gpadl;
 281    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 282        if (gpadl->id == gpadl_id) {
 283            return gpadl;
 284        }
 285    }
 286    return NULL;
 287}
 288
 289VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
 290{
 291    VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
 292    if (!gpadl || !gpadl_full(gpadl)) {
 293        return NULL;
 294    }
 295    gpadl->refcount++;
 296    return gpadl;
 297}
 298
 299void vmbus_put_gpadl(VMBusGpadl *gpadl)
 300{
 301    if (!gpadl) {
 302        return;
 303    }
 304    if (--gpadl->refcount) {
 305        return;
 306    }
 307    free_gpadl(gpadl);
 308}
 309
 310uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
 311{
 312    return gpadl->num_gfns * TARGET_PAGE_SIZE;
 313}
 314
 315static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
 316                            AddressSpace *as, DMADirection dir)
 317{
 318    iter->gpadl = gpadl;
 319    iter->as = as;
 320    iter->dir = dir;
 321    iter->active = false;
 322}
 323
 324static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
 325{
 326    uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 327    uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
 328
 329    /* mapping is only done to do non-zero amount of i/o */
 330    assert(iter->last_off > 0);
 331    assert(map_start_in_page < io_end_in_page);
 332
 333    dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
 334                     iter->dir, io_end_in_page - map_start_in_page);
 335}
 336
 337/*
 338 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
 339 * The direction of the copy is determined by @iter->dir.
 340 * The caller must ensure the operation overflows neither @buf nor the GPADL
 341 * (there's an assert for the latter).
 342 * Reuse the currently mapped page in the GPADL if possible.
 343 */
 344static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
 345{
 346    ssize_t ret = len;
 347
 348    assert(iter->active);
 349
 350    while (len) {
 351        uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
 352        uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
 353        uint32_t cplen = MIN(pgleft, len);
 354        void *p;
 355
 356        /* try to reuse the cached mapping */
 357        if (iter->map) {
 358            uint32_t map_start_in_page =
 359                (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 360            uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
 361            uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
 362            if (off_base != mapped_base || off_in_page < map_start_in_page) {
 363                gpadl_iter_cache_unmap(iter);
 364                iter->map = NULL;
 365            }
 366        }
 367
 368        if (!iter->map) {
 369            dma_addr_t maddr;
 370            dma_addr_t mlen = pgleft;
 371            uint32_t idx = iter->off >> TARGET_PAGE_BITS;
 372            assert(idx < iter->gpadl->num_gfns);
 373
 374            maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
 375
 376            iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
 377                                       MEMTXATTRS_UNSPECIFIED);
 378            if (mlen != pgleft) {
 379                dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
 380                iter->map = NULL;
 381                return -EFAULT;
 382            }
 383        }
 384
 385        p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
 386                off_in_page);
 387        if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
 388            memcpy(p, buf, cplen);
 389        } else {
 390            memcpy(buf, p, cplen);
 391        }
 392
 393        buf += cplen;
 394        len -= cplen;
 395        iter->off += cplen;
 396        iter->last_off = iter->off;
 397    }
 398
 399    return ret;
 400}
 401
 402/*
 403 * Position the iterator @iter at new offset @new_off.
 404 * If this results in the cached mapping being unusable with the new offset,
 405 * unmap it.
 406 */
 407static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
 408{
 409    assert(iter->active);
 410    iter->off = new_off;
 411}
 412
 413/*
 414 * Start a series of i/o on the GPADL.
 415 * After this i/o and seek operations on @iter become legal.
 416 */
 417static inline void gpadl_iter_start_io(GpadlIter *iter)
 418{
 419    assert(!iter->active);
 420    /* mapping is cached lazily on i/o */
 421    iter->map = NULL;
 422    iter->active = true;
 423}
 424
 425/*
 426 * End the eariler started series of i/o on the GPADL and release the cached
 427 * mapping if any.
 428 */
 429static inline void gpadl_iter_end_io(GpadlIter *iter)
 430{
 431    assert(iter->active);
 432
 433    if (iter->map) {
 434        gpadl_iter_cache_unmap(iter);
 435    }
 436
 437    iter->active = false;
 438}
 439
 440static void vmbus_resched(VMBus *vmbus);
 441static void vmbus_msg_cb(void *data, int status);
 442
 443ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
 444                           const struct iovec *iov, size_t iov_cnt)
 445{
 446    GpadlIter iter;
 447    size_t i;
 448    ssize_t ret = 0;
 449
 450    gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
 451                    DMA_DIRECTION_FROM_DEVICE);
 452    gpadl_iter_start_io(&iter);
 453    gpadl_iter_seek(&iter, off);
 454    for (i = 0; i < iov_cnt; i++) {
 455        ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
 456        if (ret < 0) {
 457            goto out;
 458        }
 459    }
 460out:
 461    gpadl_iter_end_io(&iter);
 462    return ret;
 463}
 464
 465int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 466                  unsigned iov_cnt, size_t len, size_t off)
 467{
 468    int ret_cnt = 0, ret;
 469    unsigned i;
 470    QEMUSGList *sgl = &req->sgl;
 471    ScatterGatherEntry *sg = sgl->sg;
 472
 473    for (i = 0; i < sgl->nsg; i++) {
 474        if (sg[i].len > off) {
 475            break;
 476        }
 477        off -= sg[i].len;
 478    }
 479    for (; len && i < sgl->nsg; i++) {
 480        dma_addr_t mlen = MIN(sg[i].len - off, len);
 481        dma_addr_t addr = sg[i].base + off;
 482        len -= mlen;
 483        off = 0;
 484
 485        for (; mlen; ret_cnt++) {
 486            dma_addr_t l = mlen;
 487            dma_addr_t a = addr;
 488
 489            if (ret_cnt == iov_cnt) {
 490                ret = -ENOBUFS;
 491                goto err;
 492            }
 493
 494            iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
 495                                                   MEMTXATTRS_UNSPECIFIED);
 496            if (!l) {
 497                ret = -EFAULT;
 498                goto err;
 499            }
 500            iov[ret_cnt].iov_len = l;
 501            addr += l;
 502            mlen -= l;
 503        }
 504    }
 505
 506    return ret_cnt;
 507err:
 508    vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
 509    return ret;
 510}
 511
 512void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 513                     unsigned iov_cnt, size_t accessed)
 514{
 515    QEMUSGList *sgl = &req->sgl;
 516    unsigned i;
 517
 518    for (i = 0; i < iov_cnt; i++) {
 519        size_t acsd = MIN(accessed, iov[i].iov_len);
 520        dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
 521        accessed -= acsd;
 522    }
 523}
 524
 525static const VMStateDescription vmstate_gpadl = {
 526    .name = "vmbus/gpadl",
 527    .version_id = 0,
 528    .minimum_version_id = 0,
 529    .fields = (VMStateField[]) {
 530        VMSTATE_UINT32(id, VMBusGpadl),
 531        VMSTATE_UINT32(child_relid, VMBusGpadl),
 532        VMSTATE_UINT32(num_gfns, VMBusGpadl),
 533        VMSTATE_UINT32(seen_gfns, VMBusGpadl),
 534        VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
 535                                    vmstate_info_uint64, uint64_t),
 536        VMSTATE_UINT8(state, VMBusGpadl),
 537        VMSTATE_END_OF_LIST()
 538    }
 539};
 540
 541/*
 542 * Wrap the index into a ring buffer of @len bytes.
 543 * @idx is assumed not to exceed twice the size of the ringbuffer, so only
 544 * single wraparound is considered.
 545 */
 546static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
 547{
 548    if (idx >= len) {
 549        idx -= len;
 550    }
 551    return idx;
 552}
 553
 554/*
 555 * Circular difference between two indices into a ring buffer of @len bytes.
 556 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
 557 * up write index but not vice versa.
 558 */
 559static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
 560                                    bool allow_catchup)
 561{
 562    return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
 563}
 564
 565static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
 566{
 567    vmbus_ring_buffer *rb;
 568    dma_addr_t mlen = sizeof(*rb);
 569
 570    rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
 571                        DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
 572    if (mlen != sizeof(*rb)) {
 573        dma_memory_unmap(ringbuf->as, rb, mlen,
 574                         DMA_DIRECTION_FROM_DEVICE, 0);
 575        return NULL;
 576    }
 577    return rb;
 578}
 579
 580static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
 581                              vmbus_ring_buffer *rb, bool dirty)
 582{
 583    assert(rb);
 584
 585    dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
 586                     dirty ? sizeof(*rb) : 0);
 587}
 588
 589static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
 590                                AddressSpace *as, DMADirection dir,
 591                                uint32_t begin, uint32_t end)
 592{
 593    ringbuf->as = as;
 594    ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
 595    ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
 596    ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
 597    gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
 598}
 599
 600static int ringbufs_init(VMBusChannel *chan)
 601{
 602    vmbus_ring_buffer *rb;
 603    VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
 604    VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
 605
 606    if (chan->ringbuf_send_offset <= 1 ||
 607        chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
 608        return -EINVAL;
 609    }
 610
 611    ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 612                        DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
 613    ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 614                        DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
 615                        chan->gpadl->num_gfns);
 616    send_ringbuf->wanted = 0;
 617    send_ringbuf->reserved = 0;
 618
 619    rb = ringbuf_map_hdr(&recv_ringbuf->common);
 620    if (!rb) {
 621        return -EFAULT;
 622    }
 623    recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
 624    ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
 625
 626    rb = ringbuf_map_hdr(&send_ringbuf->common);
 627    if (!rb) {
 628        return -EFAULT;
 629    }
 630    send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
 631    send_ringbuf->last_seen_rd_idx = rb->read_index;
 632    rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
 633    ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
 634
 635    if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
 636        send_ringbuf->wr_idx >= send_ringbuf->common.len) {
 637        return -EOVERFLOW;
 638    }
 639
 640    return 0;
 641}
 642
 643/*
 644 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
 645 * around if needed.
 646 * @len is assumed not to exceed the size of the ringbuffer, so only single
 647 * wraparound is considered.
 648 */
 649static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
 650{
 651    ssize_t ret1 = 0, ret2 = 0;
 652    uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
 653
 654    if (len >= remain) {
 655        ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
 656        if (ret1 < 0) {
 657            return ret1;
 658        }
 659        gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
 660        buf += remain;
 661        len -= remain;
 662    }
 663    ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
 664    if (ret2 < 0) {
 665        return ret2;
 666    }
 667    return ret1 + ret2;
 668}
 669
 670/*
 671 * Position the circular iterator within @ringbuf to offset @new_off, wrapping
 672 * around if needed.
 673 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
 674 * single wraparound is considered.
 675 */
 676static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
 677{
 678    gpadl_iter_seek(&ringbuf->iter,
 679                    ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
 680}
 681
 682static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
 683{
 684    return ringbuf->iter.off - ringbuf->base;
 685}
 686
 687static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
 688{
 689    gpadl_iter_start_io(&ringbuf->iter);
 690}
 691
 692static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
 693{
 694    gpadl_iter_end_io(&ringbuf->iter);
 695}
 696
 697VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
 698{
 699    return chan->dev;
 700}
 701
 702VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
 703{
 704    if (chan_idx >= dev->num_channels) {
 705        return NULL;
 706    }
 707    return &dev->channels[chan_idx];
 708}
 709
 710uint32_t vmbus_channel_idx(VMBusChannel *chan)
 711{
 712    return chan - chan->dev->channels;
 713}
 714
 715void vmbus_channel_notify_host(VMBusChannel *chan)
 716{
 717    event_notifier_set(&chan->notifier);
 718}
 719
 720bool vmbus_channel_is_open(VMBusChannel *chan)
 721{
 722    return chan->is_open;
 723}
 724
 725/*
 726 * Notify the guest side about the data to work on in the channel ring buffer.
 727 * The notification is done by signaling a dedicated per-channel SynIC event
 728 * flag (more recent guests) or setting a bit in the interrupt page and firing
 729 * the VMBus SINT (older guests).
 730 */
 731static int vmbus_channel_notify_guest(VMBusChannel *chan)
 732{
 733    int res = 0;
 734    unsigned long *int_map, mask;
 735    unsigned idx;
 736    hwaddr addr = chan->vmbus->int_page_gpa;
 737    hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
 738
 739    trace_vmbus_channel_notify_guest(chan->id);
 740
 741    if (!addr) {
 742        return hyperv_set_event_flag(chan->notify_route, chan->id);
 743    }
 744
 745    int_map = cpu_physical_memory_map(addr, &len, 1);
 746    if (len != TARGET_PAGE_SIZE / 2) {
 747        res = -ENXIO;
 748        goto unmap;
 749    }
 750
 751    idx = BIT_WORD(chan->id);
 752    mask = BIT_MASK(chan->id);
 753    if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
 754        res = hyperv_sint_route_set_sint(chan->notify_route);
 755        dirty = len;
 756    }
 757
 758unmap:
 759    cpu_physical_memory_unmap(int_map, len, 1, dirty);
 760    return res;
 761}
 762
 763#define VMBUS_PKT_TRAILER      sizeof(uint64_t)
 764
 765static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
 766                                          uint32_t desclen, uint32_t msglen)
 767{
 768    hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
 769        DIV_ROUND_UP(desclen, sizeof(uint64_t));
 770    hdr->len_qwords = hdr->offset_qwords +
 771        DIV_ROUND_UP(msglen, sizeof(uint64_t));
 772    return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
 773}
 774
 775/*
 776 * Simplified ring buffer operation with paired barriers annotations in the
 777 * producer and consumer loops:
 778 *
 779 * producer                           * consumer
 780 * ~~~~~~~~                           * ~~~~~~~~
 781 * write pending_send_sz              * read write_index
 782 * smp_mb                       [A]   * smp_mb                       [C]
 783 * read read_index                    * read packet
 784 * smp_mb                       [B]   * read/write out-of-band data
 785 * read/write out-of-band data        * smp_mb                       [B]
 786 * write packet                       * write read_index
 787 * smp_mb                       [C]   * smp_mb                       [A]
 788 * write write_index                  * read pending_send_sz
 789 * smp_wmb                      [D]   * smp_rmb                      [D]
 790 * write pending_send_sz              * read write_index
 791 * ...                                * ...
 792 */
 793
 794static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
 795{
 796    /* don't trust guest data */
 797    if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
 798        return 0;
 799    }
 800    return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
 801                        ringbuf->common.len, false);
 802}
 803
 804static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
 805{
 806    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 807    vmbus_ring_buffer *rb;
 808    uint32_t written;
 809
 810    written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
 811                           ringbuf->common.len, true);
 812    if (!written) {
 813        return 0;
 814    }
 815
 816    rb = ringbuf_map_hdr(&ringbuf->common);
 817    if (!rb) {
 818        return -EFAULT;
 819    }
 820
 821    ringbuf->reserved -= written;
 822
 823    /* prevent reorder with the data operation and packet write */
 824    smp_mb();                   /* barrier pair [C] */
 825    rb->write_index = ringbuf->wr_idx;
 826
 827    /*
 828     * If the producer earlier indicated that it wants to be notified when the
 829     * consumer frees certain amount of space in the ring buffer, that amount
 830     * is reduced by the size of the completed write.
 831     */
 832    if (ringbuf->wanted) {
 833        /* otherwise reservation would fail */
 834        assert(ringbuf->wanted < written);
 835        ringbuf->wanted -= written;
 836        /* prevent reorder with write_index write */
 837        smp_wmb();              /* barrier pair [D] */
 838        rb->pending_send_sz = ringbuf->wanted;
 839    }
 840
 841    /* prevent reorder with write_index or pending_send_sz write */
 842    smp_mb();                   /* barrier pair [A] */
 843    ringbuf->last_seen_rd_idx = rb->read_index;
 844
 845    /*
 846     * The consumer may have missed the reduction of pending_send_sz and skip
 847     * notification, so re-check the blocking condition, and, if it's no longer
 848     * true, ensure processing another iteration by simulating consumer's
 849     * notification.
 850     */
 851    if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
 852        vmbus_channel_notify_host(chan);
 853    }
 854
 855    /* skip notification by consumer's request */
 856    if (rb->interrupt_mask) {
 857        goto out;
 858    }
 859
 860    /*
 861     * The consumer hasn't caught up with the producer's previous state so it's
 862     * not blocked.
 863     * (last_seen_rd_idx comes from the guest but it's safe to use w/o
 864     * validation here as it only affects notification.)
 865     */
 866    if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
 867                     ringbuf->common.len, true) > written) {
 868        goto out;
 869    }
 870
 871    vmbus_channel_notify_guest(chan);
 872out:
 873    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
 874    ringbuf->last_wr_idx = ringbuf->wr_idx;
 875    return written;
 876}
 877
 878int vmbus_channel_reserve(VMBusChannel *chan,
 879                          uint32_t desclen, uint32_t msglen)
 880{
 881    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 882    vmbus_ring_buffer *rb = NULL;
 883    vmbus_packet_hdr hdr;
 884    uint32_t needed = ringbuf->reserved +
 885        vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 886
 887    /* avoid touching the guest memory if possible */
 888    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 889        goto success;
 890    }
 891
 892    rb = ringbuf_map_hdr(&ringbuf->common);
 893    if (!rb) {
 894        return -EFAULT;
 895    }
 896
 897    /* fetch read index from guest memory and try again */
 898    ringbuf->last_seen_rd_idx = rb->read_index;
 899
 900    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 901        goto success;
 902    }
 903
 904    rb->pending_send_sz = needed;
 905
 906    /*
 907     * The consumer may have made progress and freed up some space before
 908     * seeing updated pending_send_sz, so re-read read_index (preventing
 909     * reorder with the pending_send_sz write) and try again.
 910     */
 911    smp_mb();                   /* barrier pair [A] */
 912    ringbuf->last_seen_rd_idx = rb->read_index;
 913
 914    if (needed > ringbuf_send_avail(ringbuf)) {
 915        goto out;
 916    }
 917
 918success:
 919    ringbuf->reserved = needed;
 920    needed = 0;
 921
 922    /* clear pending_send_sz if it was set */
 923    if (ringbuf->wanted) {
 924        if (!rb) {
 925            rb = ringbuf_map_hdr(&ringbuf->common);
 926            if (!rb) {
 927                /* failure to clear pending_send_sz is non-fatal */
 928                goto out;
 929            }
 930        }
 931
 932        rb->pending_send_sz = 0;
 933    }
 934
 935    /* prevent reorder of the following data operation with read_index read */
 936    smp_mb();                   /* barrier pair [B] */
 937
 938out:
 939    if (rb) {
 940        ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
 941    }
 942    ringbuf->wanted = needed;
 943    return needed ? -ENOSPC : 0;
 944}
 945
 946ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
 947                           void *desc, uint32_t desclen,
 948                           void *msg, uint32_t msglen,
 949                           bool need_comp, uint64_t transaction_id)
 950{
 951    ssize_t ret = 0;
 952    vmbus_packet_hdr hdr;
 953    uint32_t totlen;
 954    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 955
 956    if (!vmbus_channel_is_open(chan)) {
 957        return -EINVAL;
 958    }
 959
 960    totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 961    hdr.type = pkt_type;
 962    hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
 963    hdr.transaction_id = transaction_id;
 964
 965    assert(totlen <= ringbuf->reserved);
 966
 967    ringbuf_start_io(&ringbuf->common);
 968    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
 969    ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
 970    if (ret < 0) {
 971        goto out;
 972    }
 973    if (desclen) {
 974        assert(desc);
 975        ret = ringbuf_io(&ringbuf->common, desc, desclen);
 976        if (ret < 0) {
 977            goto out;
 978        }
 979        ringbuf_seek(&ringbuf->common,
 980                     ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
 981    }
 982    ret = ringbuf_io(&ringbuf->common, msg, msglen);
 983    if (ret < 0) {
 984        goto out;
 985    }
 986    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
 987    ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
 988    ret = 0;
 989out:
 990    ringbuf_end_io(&ringbuf->common);
 991    if (ret) {
 992        return ret;
 993    }
 994    return ringbuf_send_update_idx(chan);
 995}
 996
 997ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
 998                                      void *msg, uint32_t msglen)
 999{
1000    assert(req->need_comp);
1001    return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
1002                              msg, msglen, false, req->transaction_id);
1003}
1004
1005static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
1006                               VMBusRingBufCommon *ringbuf, uint32_t len)
1007{
1008    int ret;
1009    vmbus_pkt_gpa_direct hdr;
1010    hwaddr curaddr = 0;
1011    hwaddr curlen = 0;
1012    int num;
1013
1014    if (len < sizeof(hdr)) {
1015        return -EIO;
1016    }
1017    ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
1018    if (ret < 0) {
1019        return ret;
1020    }
1021    len -= sizeof(hdr);
1022
1023    num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
1024    if (num < 0) {
1025        return -EIO;
1026    }
1027    qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
1028
1029    for (; hdr.rangecount; hdr.rangecount--) {
1030        vmbus_gpa_range range;
1031
1032        if (len < sizeof(range)) {
1033            goto eio;
1034        }
1035        ret = ringbuf_io(ringbuf, &range, sizeof(range));
1036        if (ret < 0) {
1037            goto err;
1038        }
1039        len -= sizeof(range);
1040
1041        if (range.byte_offset & TARGET_PAGE_MASK) {
1042            goto eio;
1043        }
1044
1045        for (; range.byte_count; range.byte_offset = 0) {
1046            uint64_t paddr;
1047            uint32_t plen = MIN(range.byte_count,
1048                                TARGET_PAGE_SIZE - range.byte_offset);
1049
1050            if (len < sizeof(uint64_t)) {
1051                goto eio;
1052            }
1053            ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
1054            if (ret < 0) {
1055                goto err;
1056            }
1057            len -= sizeof(uint64_t);
1058            paddr <<= TARGET_PAGE_BITS;
1059            paddr |= range.byte_offset;
1060            range.byte_count -= plen;
1061
1062            if (curaddr + curlen == paddr) {
1063                /* consecutive fragments - join */
1064                curlen += plen;
1065            } else {
1066                if (curlen) {
1067                    qemu_sglist_add(sgl, curaddr, curlen);
1068                }
1069
1070                curaddr = paddr;
1071                curlen = plen;
1072            }
1073        }
1074    }
1075
1076    if (curlen) {
1077        qemu_sglist_add(sgl, curaddr, curlen);
1078    }
1079
1080    return 0;
1081eio:
1082    ret = -EIO;
1083err:
1084    qemu_sglist_destroy(sgl);
1085    return ret;
1086}
1087
1088static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
1089                                     uint32_t size, uint16_t pkt_type,
1090                                     uint32_t msglen, uint64_t transaction_id,
1091                                     bool need_comp)
1092{
1093    VMBusChanReq *req;
1094    uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
1095    uint32_t totlen = msgoff + msglen;
1096
1097    req = g_malloc0(totlen);
1098    req->chan = chan;
1099    req->pkt_type = pkt_type;
1100    req->msg = (void *)req + msgoff;
1101    req->msglen = msglen;
1102    req->transaction_id = transaction_id;
1103    req->need_comp = need_comp;
1104    return req;
1105}
1106
1107int vmbus_channel_recv_start(VMBusChannel *chan)
1108{
1109    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1110    vmbus_ring_buffer *rb;
1111
1112    rb = ringbuf_map_hdr(&ringbuf->common);
1113    if (!rb) {
1114        return -EFAULT;
1115    }
1116    ringbuf->last_seen_wr_idx = rb->write_index;
1117    ringbuf_unmap_hdr(&ringbuf->common, rb, false);
1118
1119    if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
1120        return -EOVERFLOW;
1121    }
1122
1123    /* prevent reorder of the following data operation with write_index read */
1124    smp_mb();                   /* barrier pair [C] */
1125    return 0;
1126}
1127
1128void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
1129{
1130    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1131    vmbus_packet_hdr hdr = {};
1132    VMBusChanReq *req;
1133    uint32_t avail;
1134    uint32_t totlen, pktlen, msglen, msgoff, desclen;
1135
1136    assert(size >= sizeof(*req));
1137
1138    /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
1139    avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
1140                         ringbuf->common.len, true);
1141    if (avail < sizeof(hdr)) {
1142        return NULL;
1143    }
1144
1145    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
1146    if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
1147        return NULL;
1148    }
1149
1150    pktlen = hdr.len_qwords * sizeof(uint64_t);
1151    totlen = pktlen + VMBUS_PKT_TRAILER;
1152    if (totlen > avail) {
1153        return NULL;
1154    }
1155
1156    msgoff = hdr.offset_qwords * sizeof(uint64_t);
1157    if (msgoff > pktlen || msgoff < sizeof(hdr)) {
1158        error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
1159        return NULL;
1160    }
1161
1162    msglen = pktlen - msgoff;
1163
1164    req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
1165                          hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
1166
1167    switch (hdr.type) {
1168    case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
1169        desclen = msgoff - sizeof(hdr);
1170        if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
1171                                desclen) < 0) {
1172            error_report("%s: failed to convert GPA ranges to SGL", __func__);
1173            goto free_req;
1174        }
1175        break;
1176    case VMBUS_PACKET_DATA_INBAND:
1177    case VMBUS_PACKET_COMP:
1178        break;
1179    default:
1180        error_report("%s: unexpected msg type: %x", __func__, hdr.type);
1181        goto free_req;
1182    }
1183
1184    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
1185    if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
1186        goto free_req;
1187    }
1188    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
1189
1190    return req;
1191free_req:
1192    vmbus_free_req(req);
1193    return NULL;
1194}
1195
1196void vmbus_channel_recv_pop(VMBusChannel *chan)
1197{
1198    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1199    ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
1200}
1201
1202ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
1203{
1204    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1205    vmbus_ring_buffer *rb;
1206    uint32_t read;
1207
1208    read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
1209                        ringbuf->common.len, true);
1210    if (!read) {
1211        return 0;
1212    }
1213
1214    rb = ringbuf_map_hdr(&ringbuf->common);
1215    if (!rb) {
1216        return -EFAULT;
1217    }
1218
1219    /* prevent reorder with the data operation and packet read */
1220    smp_mb();                   /* barrier pair [B] */
1221    rb->read_index = ringbuf->rd_idx;
1222
1223    /* prevent reorder of the following pending_send_sz read */
1224    smp_mb();                   /* barrier pair [A] */
1225
1226    if (rb->interrupt_mask) {
1227        goto out;
1228    }
1229
1230    if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
1231        uint32_t wr_idx, wr_avail;
1232        uint32_t wanted = rb->pending_send_sz;
1233
1234        if (!wanted) {
1235            goto out;
1236        }
1237
1238        /* prevent reorder with pending_send_sz read */
1239        smp_rmb();              /* barrier pair [D] */
1240        wr_idx = rb->write_index;
1241
1242        wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
1243                                true);
1244
1245        /* the producer wasn't blocked on the consumer state */
1246        if (wr_avail >= read + wanted) {
1247            goto out;
1248        }
1249        /* there's not enough space for the producer to make progress */
1250        if (wr_avail < wanted) {
1251            goto out;
1252        }
1253    }
1254
1255    vmbus_channel_notify_guest(chan);
1256out:
1257    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
1258    ringbuf->last_rd_idx = ringbuf->rd_idx;
1259    return read;
1260}
1261
1262void vmbus_free_req(void *req)
1263{
1264    VMBusChanReq *r = req;
1265
1266    if (!req) {
1267        return;
1268    }
1269
1270    if (r->sgl.dev) {
1271        qemu_sglist_destroy(&r->sgl);
1272    }
1273    g_free(req);
1274}
1275
1276static const VMStateDescription vmstate_sgent = {
1277    .name = "vmbus/sgentry",
1278    .version_id = 0,
1279    .minimum_version_id = 0,
1280    .fields = (VMStateField[]) {
1281        VMSTATE_UINT64(base, ScatterGatherEntry),
1282        VMSTATE_UINT64(len, ScatterGatherEntry),
1283        VMSTATE_END_OF_LIST()
1284    }
1285};
1286
1287typedef struct VMBusChanReqSave {
1288    uint16_t chan_idx;
1289    uint16_t pkt_type;
1290    uint32_t msglen;
1291    void *msg;
1292    uint64_t transaction_id;
1293    bool need_comp;
1294    uint32_t num;
1295    ScatterGatherEntry *sgl;
1296} VMBusChanReqSave;
1297
1298static const VMStateDescription vmstate_vmbus_chan_req = {
1299    .name = "vmbus/vmbus_chan_req",
1300    .version_id = 0,
1301    .minimum_version_id = 0,
1302    .fields = (VMStateField[]) {
1303        VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
1304        VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
1305        VMSTATE_UINT32(msglen, VMBusChanReqSave),
1306        VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
1307        VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
1308        VMSTATE_BOOL(need_comp, VMBusChanReqSave),
1309        VMSTATE_UINT32(num, VMBusChanReqSave),
1310        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
1311                                             vmstate_sgent, ScatterGatherEntry),
1312        VMSTATE_END_OF_LIST()
1313    }
1314};
1315
1316void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
1317{
1318    VMBusChanReqSave req_save;
1319
1320    req_save.chan_idx = req->chan->subchan_idx;
1321    req_save.pkt_type = req->pkt_type;
1322    req_save.msglen = req->msglen;
1323    req_save.msg = req->msg;
1324    req_save.transaction_id = req->transaction_id;
1325    req_save.need_comp = req->need_comp;
1326    req_save.num = req->sgl.nsg;
1327    req_save.sgl = g_memdup(req->sgl.sg,
1328                            req_save.num * sizeof(ScatterGatherEntry));
1329
1330    vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
1331
1332    g_free(req_save.sgl);
1333}
1334
1335void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
1336{
1337    VMBusChanReqSave req_save;
1338    VMBusChanReq *req = NULL;
1339    VMBusChannel *chan = NULL;
1340    uint32_t i;
1341
1342    vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
1343
1344    if (req_save.chan_idx >= dev->num_channels) {
1345        error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
1346                     req_save.chan_idx, dev->num_channels);
1347        goto out;
1348    }
1349    chan = &dev->channels[req_save.chan_idx];
1350
1351    if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
1352        goto out;
1353    }
1354
1355    req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
1356                          req_save.transaction_id, req_save.need_comp);
1357    if (req_save.msglen) {
1358        memcpy(req->msg, req_save.msg, req_save.msglen);
1359    }
1360
1361    for (i = 0; i < req_save.num; i++) {
1362        qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
1363    }
1364
1365out:
1366    if (req_save.msglen) {
1367        g_free(req_save.msg);
1368    }
1369    if (req_save.num) {
1370        g_free(req_save.sgl);
1371    }
1372    return req;
1373}
1374
1375static void channel_event_cb(EventNotifier *e)
1376{
1377    VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
1378    if (event_notifier_test_and_clear(e)) {
1379        /*
1380         * All receives are supposed to happen within the device worker, so
1381         * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
1382         * potentially reuse the cached mapping throughout the worker.
1383         * Can't do this for sends as they may happen outside the device
1384         * worker.
1385         */
1386        VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
1387        ringbuf_start_io(&ringbuf->common);
1388        chan->notify_cb(chan);
1389        ringbuf_end_io(&ringbuf->common);
1390
1391    }
1392}
1393
1394static int alloc_chan_id(VMBus *vmbus)
1395{
1396    int ret;
1397
1398    ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
1399    if (ret == VMBUS_CHANID_COUNT) {
1400        return -ENOMEM;
1401    }
1402    return ret + VMBUS_FIRST_CHANID;
1403}
1404
1405static int register_chan_id(VMBusChannel *chan)
1406{
1407    return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
1408                            chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
1409}
1410
1411static void unregister_chan_id(VMBusChannel *chan)
1412{
1413    clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
1414}
1415
1416static uint32_t chan_connection_id(VMBusChannel *chan)
1417{
1418    return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
1419}
1420
1421static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
1422                         VMBusChannel *chan, uint16_t idx, Error **errp)
1423{
1424    int res;
1425
1426    chan->dev = dev;
1427    chan->notify_cb = vdc->chan_notify_cb;
1428    chan->subchan_idx = idx;
1429    chan->vmbus = vmbus;
1430
1431    res = alloc_chan_id(vmbus);
1432    if (res < 0) {
1433        error_setg(errp, "no spare channel id");
1434        return;
1435    }
1436    chan->id = res;
1437    register_chan_id(chan);
1438
1439    /*
1440     * The guest drivers depend on the device subchannels (idx #1+) to be
1441     * offered after the primary channel (idx #0) of that device.  To ensure
1442     * that, record the channels on the channel list in the order they appear
1443     * within the device.
1444     */
1445    QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
1446}
1447
1448static void deinit_channel(VMBusChannel *chan)
1449{
1450    assert(chan->state == VMCHAN_INIT);
1451    QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
1452    unregister_chan_id(chan);
1453}
1454
1455static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
1456{
1457    uint16_t i;
1458    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
1459    Error *err = NULL;
1460
1461    dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
1462    if (dev->num_channels < 1) {
1463        error_setg(errp, "invalid #channels: %u", dev->num_channels);
1464        return;
1465    }
1466
1467    dev->channels = g_new0(VMBusChannel, dev->num_channels);
1468    for (i = 0; i < dev->num_channels; i++) {
1469        init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
1470        if (err) {
1471            goto err_init;
1472        }
1473    }
1474
1475    return;
1476
1477err_init:
1478    while (i--) {
1479        deinit_channel(&dev->channels[i]);
1480    }
1481    error_propagate(errp, err);
1482}
1483
1484static void free_channels(VMBusDevice *dev)
1485{
1486    uint16_t i;
1487    for (i = 0; i < dev->num_channels; i++) {
1488        deinit_channel(&dev->channels[i]);
1489    }
1490    g_free(dev->channels);
1491}
1492
1493static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
1494{
1495    VMBusChannel *chan;
1496
1497    if (vp_index == vmbus->target_vp) {
1498        hyperv_sint_route_ref(vmbus->sint_route);
1499        return vmbus->sint_route;
1500    }
1501
1502    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1503        if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
1504            hyperv_sint_route_ref(chan->notify_route);
1505            return chan->notify_route;
1506        }
1507    }
1508
1509    return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
1510}
1511
1512static void open_channel(VMBusChannel *chan)
1513{
1514    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1515
1516    chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
1517    if (!chan->gpadl) {
1518        return;
1519    }
1520
1521    if (ringbufs_init(chan)) {
1522        goto put_gpadl;
1523    }
1524
1525    if (event_notifier_init(&chan->notifier, 0)) {
1526        goto put_gpadl;
1527    }
1528
1529    event_notifier_set_handler(&chan->notifier, channel_event_cb);
1530
1531    if (hyperv_set_event_flag_handler(chan_connection_id(chan),
1532                                      &chan->notifier)) {
1533        goto cleanup_notifier;
1534    }
1535
1536    chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
1537    if (!chan->notify_route) {
1538        goto clear_event_flag_handler;
1539    }
1540
1541    if (vdc->open_channel && vdc->open_channel(chan)) {
1542        goto unref_sint_route;
1543    }
1544
1545    chan->is_open = true;
1546    return;
1547
1548unref_sint_route:
1549    hyperv_sint_route_unref(chan->notify_route);
1550clear_event_flag_handler:
1551    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1552cleanup_notifier:
1553    event_notifier_set_handler(&chan->notifier, NULL);
1554    event_notifier_cleanup(&chan->notifier);
1555put_gpadl:
1556    vmbus_put_gpadl(chan->gpadl);
1557}
1558
1559static void close_channel(VMBusChannel *chan)
1560{
1561    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1562
1563    if (!chan->is_open) {
1564        return;
1565    }
1566
1567    if (vdc->close_channel) {
1568        vdc->close_channel(chan);
1569    }
1570
1571    hyperv_sint_route_unref(chan->notify_route);
1572    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
1573    event_notifier_set_handler(&chan->notifier, NULL);
1574    event_notifier_cleanup(&chan->notifier);
1575    vmbus_put_gpadl(chan->gpadl);
1576    chan->is_open = false;
1577}
1578
1579static int channel_post_load(void *opaque, int version_id)
1580{
1581    VMBusChannel *chan = opaque;
1582
1583    return register_chan_id(chan);
1584}
1585
1586static const VMStateDescription vmstate_channel = {
1587    .name = "vmbus/channel",
1588    .version_id = 0,
1589    .minimum_version_id = 0,
1590    .post_load = channel_post_load,
1591    .fields = (VMStateField[]) {
1592        VMSTATE_UINT32(id, VMBusChannel),
1593        VMSTATE_UINT16(subchan_idx, VMBusChannel),
1594        VMSTATE_UINT32(open_id, VMBusChannel),
1595        VMSTATE_UINT32(target_vp, VMBusChannel),
1596        VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
1597        VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
1598        VMSTATE_UINT8(offer_state, VMBusChannel),
1599        VMSTATE_UINT8(state, VMBusChannel),
1600        VMSTATE_END_OF_LIST()
1601    }
1602};
1603
1604static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
1605{
1606    VMBusChannel *chan;
1607    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1608        if (chan->id == id) {
1609            return chan;
1610        }
1611    }
1612    return NULL;
1613}
1614
1615static int enqueue_incoming_message(VMBus *vmbus,
1616                                    const struct hyperv_post_message_input *msg)
1617{
1618    int ret = 0;
1619    uint8_t idx, prev_size;
1620
1621    qemu_mutex_lock(&vmbus->rx_queue_lock);
1622
1623    if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
1624        ret = -ENOBUFS;
1625        goto out;
1626    }
1627
1628    prev_size = vmbus->rx_queue_size;
1629    idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
1630    memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
1631    vmbus->rx_queue_size++;
1632
1633    /* only need to resched if the queue was empty before */
1634    if (!prev_size) {
1635        vmbus_resched(vmbus);
1636    }
1637out:
1638    qemu_mutex_unlock(&vmbus->rx_queue_lock);
1639    return ret;
1640}
1641
1642static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
1643                                   void *data)
1644{
1645    VMBus *vmbus = data;
1646    struct vmbus_message_header *vmbus_msg;
1647
1648    if (msg->message_type != HV_MESSAGE_VMBUS) {
1649        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1650    }
1651
1652    if (msg->payload_size < sizeof(struct vmbus_message_header)) {
1653        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1654    }
1655
1656    vmbus_msg = (struct vmbus_message_header *)msg->payload;
1657
1658    trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
1659
1660    if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
1661        vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
1662        error_report("vmbus: unknown message type %#x",
1663                     vmbus_msg->message_type);
1664        return HV_STATUS_INVALID_HYPERCALL_INPUT;
1665    }
1666
1667    if (enqueue_incoming_message(vmbus, msg)) {
1668        return HV_STATUS_INSUFFICIENT_BUFFERS;
1669    }
1670    return HV_STATUS_SUCCESS;
1671}
1672
1673static bool vmbus_initialized(VMBus *vmbus)
1674{
1675    return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
1676}
1677
1678static void vmbus_reset_all(VMBus *vmbus)
1679{
1680    qbus_reset_all(BUS(vmbus));
1681}
1682
1683static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
1684{
1685    int ret;
1686    struct hyperv_message msg = {
1687        .header.message_type = HV_MESSAGE_VMBUS,
1688    };
1689
1690    assert(!vmbus->msg_in_progress);
1691    assert(msglen <= sizeof(msg.payload));
1692    assert(msglen >= sizeof(struct vmbus_message_header));
1693
1694    vmbus->msg_in_progress = true;
1695
1696    trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
1697                         msglen);
1698
1699    memcpy(msg.payload, msgdata, msglen);
1700    msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
1701
1702    ret = hyperv_post_msg(vmbus->sint_route, &msg);
1703    if (ret == 0 || ret == -EAGAIN) {
1704        return;
1705    }
1706
1707    error_report("message delivery fatal failure: %d; aborting vmbus", ret);
1708    vmbus_reset_all(vmbus);
1709}
1710
1711static int vmbus_init(VMBus *vmbus)
1712{
1713    if (vmbus->target_vp != (uint32_t)-1) {
1714        vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
1715                                                  vmbus_msg_cb, vmbus);
1716        if (!vmbus->sint_route) {
1717            error_report("failed to set up SINT route");
1718            return -ENOMEM;
1719        }
1720    }
1721    return 0;
1722}
1723
1724static void vmbus_deinit(VMBus *vmbus)
1725{
1726    VMBusGpadl *gpadl, *tmp_gpadl;
1727    VMBusChannel *chan;
1728
1729    QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
1730        if (gpadl->state == VMGPADL_TORNDOWN) {
1731            continue;
1732        }
1733        vmbus_put_gpadl(gpadl);
1734    }
1735
1736    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1737        chan->offer_state = VMOFFER_INIT;
1738    }
1739
1740    hyperv_sint_route_unref(vmbus->sint_route);
1741    vmbus->sint_route = NULL;
1742    vmbus->int_page_gpa = 0;
1743    vmbus->target_vp = (uint32_t)-1;
1744    vmbus->version = 0;
1745    vmbus->state = VMBUS_LISTEN;
1746    vmbus->msg_in_progress = false;
1747}
1748
1749static void handle_initiate_contact(VMBus *vmbus,
1750                                    vmbus_message_initiate_contact *msg,
1751                                    uint32_t msglen)
1752{
1753    if (msglen < sizeof(*msg)) {
1754        return;
1755    }
1756
1757    trace_vmbus_initiate_contact(msg->version_requested >> 16,
1758                                 msg->version_requested & 0xffff,
1759                                 msg->target_vcpu, msg->monitor_page1,
1760                                 msg->monitor_page2, msg->interrupt_page);
1761
1762    /*
1763     * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
1764     * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
1765     * before handing over to OS loader.
1766     */
1767    vmbus_reset_all(vmbus);
1768
1769    vmbus->target_vp = msg->target_vcpu;
1770    vmbus->version = msg->version_requested;
1771    if (vmbus->version < VMBUS_VERSION_WIN8) {
1772        /* linux passes interrupt page even when it doesn't need it */
1773        vmbus->int_page_gpa = msg->interrupt_page;
1774    }
1775    vmbus->state = VMBUS_HANDSHAKE;
1776
1777    if (vmbus_init(vmbus)) {
1778        error_report("failed to init vmbus; aborting");
1779        vmbus_deinit(vmbus);
1780        return;
1781    }
1782}
1783
1784static void send_handshake(VMBus *vmbus)
1785{
1786    struct vmbus_message_version_response msg = {
1787        .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
1788        .version_supported = vmbus_initialized(vmbus),
1789    };
1790
1791    post_msg(vmbus, &msg, sizeof(msg));
1792}
1793
1794static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
1795{
1796    VMBusChannel *chan;
1797
1798    if (!vmbus_initialized(vmbus)) {
1799        return;
1800    }
1801
1802    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1803        if (chan->offer_state == VMOFFER_INIT) {
1804            chan->offer_state = VMOFFER_SENDING;
1805            break;
1806        }
1807    }
1808
1809    vmbus->state = VMBUS_OFFER;
1810}
1811
1812static void send_offer(VMBus *vmbus)
1813{
1814    VMBusChannel *chan;
1815    struct vmbus_message_header alloffers_msg = {
1816        .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
1817    };
1818
1819    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1820        if (chan->offer_state == VMOFFER_SENDING) {
1821            VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
1822            /* Hyper-V wants LE GUIDs */
1823            QemuUUID classid = qemu_uuid_bswap(vdc->classid);
1824            QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
1825            struct vmbus_message_offer_channel msg = {
1826                .header.message_type = VMBUS_MSG_OFFERCHANNEL,
1827                .child_relid = chan->id,
1828                .connection_id = chan_connection_id(chan),
1829                .channel_flags = vdc->channel_flags,
1830                .mmio_size_mb = vdc->mmio_size_mb,
1831                .sub_channel_index = vmbus_channel_idx(chan),
1832                .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
1833            };
1834
1835            memcpy(msg.type_uuid, &classid, sizeof(classid));
1836            memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
1837
1838            trace_vmbus_send_offer(chan->id, chan->dev);
1839
1840            post_msg(vmbus, &msg, sizeof(msg));
1841            return;
1842        }
1843    }
1844
1845    /* no more offers, send terminator message */
1846    trace_vmbus_terminate_offers();
1847    post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
1848}
1849
1850static bool complete_offer(VMBus *vmbus)
1851{
1852    VMBusChannel *chan;
1853
1854    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1855        if (chan->offer_state == VMOFFER_SENDING) {
1856            chan->offer_state = VMOFFER_SENT;
1857            goto next_offer;
1858        }
1859    }
1860    /*
1861     * no transitioning channels found so this is completing the terminator
1862     * message, and vmbus can move to the next state
1863     */
1864    return true;
1865
1866next_offer:
1867    /* try to mark another channel for offering */
1868    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
1869        if (chan->offer_state == VMOFFER_INIT) {
1870            chan->offer_state = VMOFFER_SENDING;
1871            break;
1872        }
1873    }
1874    /*
1875     * if an offer has been sent there are more offers or the terminator yet to
1876     * send, so no state transition for vmbus
1877     */
1878    return false;
1879}
1880
1881
1882static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
1883                                uint32_t msglen)
1884{
1885    VMBusGpadl *gpadl;
1886    uint32_t num_gfns, i;
1887
1888    /* must include at least one gpa range */
1889    if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
1890        !vmbus_initialized(vmbus)) {
1891        return;
1892    }
1893
1894    num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
1895               sizeof(msg->range[0].pfn_array[0]);
1896
1897    trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
1898
1899    /*
1900     * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
1901     * ranges each with arbitrary size and alignment.  However in practice only
1902     * single-range page-aligned GPADLs have been observed so just ignore
1903     * anything else and simplify things greatly.
1904     */
1905    if (msg->rangecount != 1 || msg->range[0].byte_offset ||
1906        (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
1907        return;
1908    }
1909
1910    /* ignore requests to create already existing GPADLs */
1911    if (find_gpadl(vmbus, msg->gpadl_id)) {
1912        return;
1913    }
1914
1915    gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
1916
1917    for (i = 0; i < num_gfns &&
1918         (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
1919         i++) {
1920        gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
1921    }
1922
1923    if (gpadl_full(gpadl)) {
1924        vmbus->state = VMBUS_CREATE_GPADL;
1925    }
1926}
1927
1928static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
1929                              uint32_t msglen)
1930{
1931    VMBusGpadl *gpadl;
1932    uint32_t num_gfns_left, i;
1933
1934    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
1935        return;
1936    }
1937
1938    trace_vmbus_gpadl_body(msg->gpadl_id);
1939
1940    gpadl = find_gpadl(vmbus, msg->gpadl_id);
1941    if (!gpadl) {
1942        return;
1943    }
1944
1945    num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
1946    assert(num_gfns_left);
1947
1948    for (i = 0; i < num_gfns_left &&
1949         (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
1950        gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
1951    }
1952
1953    if (gpadl_full(gpadl)) {
1954        vmbus->state = VMBUS_CREATE_GPADL;
1955    }
1956}
1957
1958static void send_create_gpadl(VMBus *vmbus)
1959{
1960    VMBusGpadl *gpadl;
1961
1962    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1963        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1964            struct vmbus_message_gpadl_created msg = {
1965                .header.message_type = VMBUS_MSG_GPADL_CREATED,
1966                .gpadl_id = gpadl->id,
1967                .child_relid = gpadl->child_relid,
1968            };
1969
1970            trace_vmbus_gpadl_created(gpadl->id);
1971            post_msg(vmbus, &msg, sizeof(msg));
1972            return;
1973        }
1974    }
1975
1976    assert(false);
1977}
1978
1979static bool complete_create_gpadl(VMBus *vmbus)
1980{
1981    VMBusGpadl *gpadl;
1982
1983    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
1984        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
1985            gpadl->state = VMGPADL_ALIVE;
1986
1987            return true;
1988        }
1989    }
1990
1991    assert(false);
1992    return false;
1993}
1994
1995static void handle_gpadl_teardown(VMBus *vmbus,
1996                                  vmbus_message_gpadl_teardown *msg,
1997                                  uint32_t msglen)
1998{
1999    VMBusGpadl *gpadl;
2000
2001    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2002        return;
2003    }
2004
2005    trace_vmbus_gpadl_teardown(msg->gpadl_id);
2006
2007    gpadl = find_gpadl(vmbus, msg->gpadl_id);
2008    if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
2009        return;
2010    }
2011
2012    gpadl->state = VMGPADL_TEARINGDOWN;
2013    vmbus->state = VMBUS_TEARDOWN_GPADL;
2014}
2015
2016static void send_teardown_gpadl(VMBus *vmbus)
2017{
2018    VMBusGpadl *gpadl;
2019
2020    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2021        if (gpadl->state == VMGPADL_TEARINGDOWN) {
2022            struct vmbus_message_gpadl_torndown msg = {
2023                .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
2024                .gpadl_id = gpadl->id,
2025            };
2026
2027            trace_vmbus_gpadl_torndown(gpadl->id);
2028            post_msg(vmbus, &msg, sizeof(msg));
2029            return;
2030        }
2031    }
2032
2033    assert(false);
2034}
2035
2036static bool complete_teardown_gpadl(VMBus *vmbus)
2037{
2038    VMBusGpadl *gpadl;
2039
2040    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2041        if (gpadl->state == VMGPADL_TEARINGDOWN) {
2042            gpadl->state = VMGPADL_TORNDOWN;
2043            vmbus_put_gpadl(gpadl);
2044            return true;
2045        }
2046    }
2047
2048    assert(false);
2049    return false;
2050}
2051
2052static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
2053                                uint32_t msglen)
2054{
2055    VMBusChannel *chan;
2056
2057    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2058        return;
2059    }
2060
2061    trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
2062                             msg->target_vp);
2063    chan = find_channel(vmbus, msg->child_relid);
2064    if (!chan || chan->state != VMCHAN_INIT) {
2065        return;
2066    }
2067
2068    chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
2069    chan->ringbuf_send_offset = msg->ring_buffer_offset;
2070    chan->target_vp = msg->target_vp;
2071    chan->open_id = msg->open_id;
2072
2073    open_channel(chan);
2074
2075    chan->state = VMCHAN_OPENING;
2076    vmbus->state = VMBUS_OPEN_CHANNEL;
2077}
2078
2079static void send_open_channel(VMBus *vmbus)
2080{
2081    VMBusChannel *chan;
2082
2083    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2084        if (chan->state == VMCHAN_OPENING) {
2085            struct vmbus_message_open_result msg = {
2086                .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
2087                .child_relid = chan->id,
2088                .open_id = chan->open_id,
2089                .status = !vmbus_channel_is_open(chan),
2090            };
2091
2092            trace_vmbus_channel_open(chan->id, msg.status);
2093            post_msg(vmbus, &msg, sizeof(msg));
2094            return;
2095        }
2096    }
2097
2098    assert(false);
2099}
2100
2101static bool complete_open_channel(VMBus *vmbus)
2102{
2103    VMBusChannel *chan;
2104
2105    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2106        if (chan->state == VMCHAN_OPENING) {
2107            if (vmbus_channel_is_open(chan)) {
2108                chan->state = VMCHAN_OPEN;
2109                /*
2110                 * simulate guest notification of ringbuffer space made
2111                 * available, for the channel protocols where the host
2112                 * initiates the communication
2113                 */
2114                vmbus_channel_notify_host(chan);
2115            } else {
2116                chan->state = VMCHAN_INIT;
2117            }
2118            return true;
2119        }
2120    }
2121
2122    assert(false);
2123    return false;
2124}
2125
2126static void vdev_reset_on_close(VMBusDevice *vdev)
2127{
2128    uint16_t i;
2129
2130    for (i = 0; i < vdev->num_channels; i++) {
2131        if (vmbus_channel_is_open(&vdev->channels[i])) {
2132            return;
2133        }
2134    }
2135
2136    /* all channels closed -- reset device */
2137    qdev_reset_all(DEVICE(vdev));
2138}
2139
2140static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
2141                                 uint32_t msglen)
2142{
2143    VMBusChannel *chan;
2144
2145    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
2146        return;
2147    }
2148
2149    trace_vmbus_close_channel(msg->child_relid);
2150
2151    chan = find_channel(vmbus, msg->child_relid);
2152    if (!chan) {
2153        return;
2154    }
2155
2156    close_channel(chan);
2157    chan->state = VMCHAN_INIT;
2158
2159    vdev_reset_on_close(chan->dev);
2160}
2161
2162static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
2163{
2164    vmbus->state = VMBUS_UNLOAD;
2165}
2166
2167static void send_unload(VMBus *vmbus)
2168{
2169    vmbus_message_header msg = {
2170        .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
2171    };
2172
2173    qemu_mutex_lock(&vmbus->rx_queue_lock);
2174    vmbus->rx_queue_size = 0;
2175    qemu_mutex_unlock(&vmbus->rx_queue_lock);
2176
2177    post_msg(vmbus, &msg, sizeof(msg));
2178    return;
2179}
2180
2181static bool complete_unload(VMBus *vmbus)
2182{
2183    vmbus_reset_all(vmbus);
2184    return true;
2185}
2186
2187static void process_message(VMBus *vmbus)
2188{
2189    struct hyperv_post_message_input *hv_msg;
2190    struct vmbus_message_header *msg;
2191    void *msgdata;
2192    uint32_t msglen;
2193
2194    qemu_mutex_lock(&vmbus->rx_queue_lock);
2195
2196    if (!vmbus->rx_queue_size) {
2197        goto unlock;
2198    }
2199
2200    hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
2201    msglen =  hv_msg->payload_size;
2202    if (msglen < sizeof(*msg)) {
2203        goto out;
2204    }
2205    msgdata = hv_msg->payload;
2206    msg = (struct vmbus_message_header *)msgdata;
2207
2208    trace_vmbus_process_incoming_message(msg->message_type);
2209
2210    switch (msg->message_type) {
2211    case VMBUS_MSG_INITIATE_CONTACT:
2212        handle_initiate_contact(vmbus, msgdata, msglen);
2213        break;
2214    case VMBUS_MSG_REQUESTOFFERS:
2215        handle_request_offers(vmbus, msgdata, msglen);
2216        break;
2217    case VMBUS_MSG_GPADL_HEADER:
2218        handle_gpadl_header(vmbus, msgdata, msglen);
2219        break;
2220    case VMBUS_MSG_GPADL_BODY:
2221        handle_gpadl_body(vmbus, msgdata, msglen);
2222        break;
2223    case VMBUS_MSG_GPADL_TEARDOWN:
2224        handle_gpadl_teardown(vmbus, msgdata, msglen);
2225        break;
2226    case VMBUS_MSG_OPENCHANNEL:
2227        handle_open_channel(vmbus, msgdata, msglen);
2228        break;
2229    case VMBUS_MSG_CLOSECHANNEL:
2230        handle_close_channel(vmbus, msgdata, msglen);
2231        break;
2232    case VMBUS_MSG_UNLOAD:
2233        handle_unload(vmbus, msgdata, msglen);
2234        break;
2235    default:
2236        error_report("unknown message type %#x", msg->message_type);
2237        break;
2238    }
2239
2240out:
2241    vmbus->rx_queue_size--;
2242    vmbus->rx_queue_head++;
2243    vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
2244
2245    vmbus_resched(vmbus);
2246unlock:
2247    qemu_mutex_unlock(&vmbus->rx_queue_lock);
2248}
2249
2250static const struct {
2251    void (*run)(VMBus *vmbus);
2252    bool (*complete)(VMBus *vmbus);
2253} state_runner[] = {
2254    [VMBUS_LISTEN]         = {process_message,     NULL},
2255    [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
2256    [VMBUS_OFFER]          = {send_offer,          complete_offer},
2257    [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
2258    [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
2259    [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
2260    [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
2261};
2262
2263static void vmbus_do_run(VMBus *vmbus)
2264{
2265    if (vmbus->msg_in_progress) {
2266        return;
2267    }
2268
2269    assert(vmbus->state < VMBUS_STATE_MAX);
2270    assert(state_runner[vmbus->state].run);
2271    state_runner[vmbus->state].run(vmbus);
2272}
2273
2274static void vmbus_run(void *opaque)
2275{
2276    VMBus *vmbus = opaque;
2277
2278    /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
2279    if (vmbus->in_progress) {
2280        return;
2281    }
2282
2283    vmbus->in_progress = true;
2284    /*
2285     * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
2286     * should go *after* the code that can result in aio_poll; otherwise
2287     * reschedules can be missed.  No idea how to enforce that.
2288     */
2289    vmbus_do_run(vmbus);
2290    vmbus->in_progress = false;
2291}
2292
2293static void vmbus_msg_cb(void *data, int status)
2294{
2295    VMBus *vmbus = data;
2296    bool (*complete)(VMBus *vmbus);
2297
2298    assert(vmbus->msg_in_progress);
2299
2300    trace_vmbus_msg_cb(status);
2301
2302    if (status == -EAGAIN) {
2303        goto out;
2304    }
2305    if (status) {
2306        error_report("message delivery fatal failure: %d; aborting vmbus",
2307                     status);
2308        vmbus_reset_all(vmbus);
2309        return;
2310    }
2311
2312    assert(vmbus->state < VMBUS_STATE_MAX);
2313    complete = state_runner[vmbus->state].complete;
2314    if (!complete || complete(vmbus)) {
2315        vmbus->state = VMBUS_LISTEN;
2316    }
2317out:
2318    vmbus->msg_in_progress = false;
2319    vmbus_resched(vmbus);
2320}
2321
2322static void vmbus_resched(VMBus *vmbus)
2323{
2324    aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
2325}
2326
2327static void vmbus_signal_event(EventNotifier *e)
2328{
2329    VMBusChannel *chan;
2330    VMBus *vmbus = container_of(e, VMBus, notifier);
2331    unsigned long *int_map;
2332    hwaddr addr, len;
2333    bool is_dirty = false;
2334
2335    if (!event_notifier_test_and_clear(e)) {
2336        return;
2337    }
2338
2339    trace_vmbus_signal_event();
2340
2341    if (!vmbus->int_page_gpa) {
2342        return;
2343    }
2344
2345    addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
2346    len = TARGET_PAGE_SIZE / 2;
2347    int_map = cpu_physical_memory_map(addr, &len, 1);
2348    if (len != TARGET_PAGE_SIZE / 2) {
2349        goto unmap;
2350    }
2351
2352    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2353        if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
2354            if (!vmbus_channel_is_open(chan)) {
2355                continue;
2356            }
2357            vmbus_channel_notify_host(chan);
2358            is_dirty = true;
2359        }
2360    }
2361
2362unmap:
2363    cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
2364}
2365
2366static void vmbus_dev_realize(DeviceState *dev, Error **errp)
2367{
2368    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2369    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2370    VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
2371    BusChild *child;
2372    Error *err = NULL;
2373    char idstr[UUID_FMT_LEN + 1];
2374
2375    assert(!qemu_uuid_is_null(&vdev->instanceid));
2376
2377    if (!qemu_uuid_is_null(&vdc->instanceid)) {
2378        /* Class wants to only have a single instance with a fixed UUID */
2379        if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
2380            error_setg(&err, "instance id can't be changed");
2381            goto error_out;
2382        }
2383    }
2384
2385    /* Check for instance id collision for this class id */
2386    QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
2387        VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
2388
2389        if (child_dev == vdev) {
2390            continue;
2391        }
2392
2393        if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
2394            qemu_uuid_unparse(&vdev->instanceid, idstr);
2395            error_setg(&err, "duplicate vmbus device instance id %s", idstr);
2396            goto error_out;
2397        }
2398    }
2399
2400    vdev->dma_as = &address_space_memory;
2401
2402    create_channels(vmbus, vdev, &err);
2403    if (err) {
2404        goto error_out;
2405    }
2406
2407    if (vdc->vmdev_realize) {
2408        vdc->vmdev_realize(vdev, &err);
2409        if (err) {
2410            goto err_vdc_realize;
2411        }
2412    }
2413    return;
2414
2415err_vdc_realize:
2416    free_channels(vdev);
2417error_out:
2418    error_propagate(errp, err);
2419}
2420
2421static void vmbus_dev_reset(DeviceState *dev)
2422{
2423    uint16_t i;
2424    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2425    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2426
2427    if (vdev->channels) {
2428        for (i = 0; i < vdev->num_channels; i++) {
2429            VMBusChannel *chan = &vdev->channels[i];
2430            close_channel(chan);
2431            chan->state = VMCHAN_INIT;
2432        }
2433    }
2434
2435    if (vdc->vmdev_reset) {
2436        vdc->vmdev_reset(vdev);
2437    }
2438}
2439
2440static void vmbus_dev_unrealize(DeviceState *dev)
2441{
2442    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2443    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2444
2445    if (vdc->vmdev_unrealize) {
2446        vdc->vmdev_unrealize(vdev);
2447    }
2448    free_channels(vdev);
2449}
2450
2451static Property vmbus_dev_props[] = {
2452    DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
2453    DEFINE_PROP_END_OF_LIST()
2454};
2455
2456
2457static void vmbus_dev_class_init(ObjectClass *klass, void *data)
2458{
2459    DeviceClass *kdev = DEVICE_CLASS(klass);
2460    device_class_set_props(kdev, vmbus_dev_props);
2461    kdev->bus_type = TYPE_VMBUS;
2462    kdev->realize = vmbus_dev_realize;
2463    kdev->unrealize = vmbus_dev_unrealize;
2464    kdev->reset = vmbus_dev_reset;
2465}
2466
2467static void vmbus_dev_instance_init(Object *obj)
2468{
2469    VMBusDevice *vdev = VMBUS_DEVICE(obj);
2470    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
2471
2472    if (!qemu_uuid_is_null(&vdc->instanceid)) {
2473        /* Class wants to only have a single instance with a fixed UUID */
2474        vdev->instanceid = vdc->instanceid;
2475    }
2476}
2477
2478const VMStateDescription vmstate_vmbus_dev = {
2479    .name = TYPE_VMBUS_DEVICE,
2480    .version_id = 0,
2481    .minimum_version_id = 0,
2482    .fields = (VMStateField[]) {
2483        VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
2484        VMSTATE_UINT16(num_channels, VMBusDevice),
2485        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
2486                                             num_channels, vmstate_channel,
2487                                             VMBusChannel),
2488        VMSTATE_END_OF_LIST()
2489    }
2490};
2491
2492/* vmbus generic device base */
2493static const TypeInfo vmbus_dev_type_info = {
2494    .name = TYPE_VMBUS_DEVICE,
2495    .parent = TYPE_DEVICE,
2496    .abstract = true,
2497    .instance_size = sizeof(VMBusDevice),
2498    .class_size = sizeof(VMBusDeviceClass),
2499    .class_init = vmbus_dev_class_init,
2500    .instance_init = vmbus_dev_instance_init,
2501};
2502
2503static void vmbus_realize(BusState *bus, Error **errp)
2504{
2505    int ret = 0;
2506    Error *local_err = NULL;
2507    VMBus *vmbus = VMBUS(bus);
2508
2509    qemu_mutex_init(&vmbus->rx_queue_lock);
2510
2511    QTAILQ_INIT(&vmbus->gpadl_list);
2512    QTAILQ_INIT(&vmbus->channel_list);
2513
2514    ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
2515                                 vmbus_recv_message, vmbus);
2516    if (ret != 0) {
2517        error_setg(&local_err, "hyperv set message handler failed: %d", ret);
2518        goto error_out;
2519    }
2520
2521    ret = event_notifier_init(&vmbus->notifier, 0);
2522    if (ret != 0) {
2523        error_setg(&local_err, "event notifier failed to init with %d", ret);
2524        goto remove_msg_handler;
2525    }
2526
2527    event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
2528    ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
2529                                        &vmbus->notifier);
2530    if (ret != 0) {
2531        error_setg(&local_err, "hyperv set event handler failed with %d", ret);
2532        goto clear_event_notifier;
2533    }
2534
2535    return;
2536
2537clear_event_notifier:
2538    event_notifier_cleanup(&vmbus->notifier);
2539remove_msg_handler:
2540    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2541error_out:
2542    qemu_mutex_destroy(&vmbus->rx_queue_lock);
2543    error_propagate(errp, local_err);
2544}
2545
2546static void vmbus_unrealize(BusState *bus)
2547{
2548    VMBus *vmbus = VMBUS(bus);
2549
2550    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
2551    hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
2552    event_notifier_cleanup(&vmbus->notifier);
2553
2554    qemu_mutex_destroy(&vmbus->rx_queue_lock);
2555}
2556
2557static void vmbus_reset(BusState *bus)
2558{
2559    vmbus_deinit(VMBUS(bus));
2560}
2561
2562static char *vmbus_get_dev_path(DeviceState *dev)
2563{
2564    BusState *bus = qdev_get_parent_bus(dev);
2565    return qdev_get_dev_path(bus->parent);
2566}
2567
2568static char *vmbus_get_fw_dev_path(DeviceState *dev)
2569{
2570    VMBusDevice *vdev = VMBUS_DEVICE(dev);
2571    char uuid[UUID_FMT_LEN + 1];
2572
2573    qemu_uuid_unparse(&vdev->instanceid, uuid);
2574    return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
2575}
2576
2577static void vmbus_class_init(ObjectClass *klass, void *data)
2578{
2579    BusClass *k = BUS_CLASS(klass);
2580
2581    k->get_dev_path = vmbus_get_dev_path;
2582    k->get_fw_dev_path = vmbus_get_fw_dev_path;
2583    k->realize = vmbus_realize;
2584    k->unrealize = vmbus_unrealize;
2585    k->reset = vmbus_reset;
2586}
2587
2588static int vmbus_pre_load(void *opaque)
2589{
2590    VMBusChannel *chan;
2591    VMBus *vmbus = VMBUS(opaque);
2592
2593    /*
2594     * channel IDs allocated by the source will come in the migration stream
2595     * for each channel, so clean up the ones allocated at realize
2596     */
2597    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2598        unregister_chan_id(chan);
2599    }
2600
2601    return 0;
2602}
2603static int vmbus_post_load(void *opaque, int version_id)
2604{
2605    int ret;
2606    VMBus *vmbus = VMBUS(opaque);
2607    VMBusGpadl *gpadl;
2608    VMBusChannel *chan;
2609
2610    ret = vmbus_init(vmbus);
2611    if (ret) {
2612        return ret;
2613    }
2614
2615    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
2616        gpadl->vmbus = vmbus;
2617        gpadl->refcount = 1;
2618    }
2619
2620    /*
2621     * reopening channels depends on initialized vmbus so it's done here
2622     * instead of channel_post_load()
2623     */
2624    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
2625
2626        if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
2627            open_channel(chan);
2628        }
2629
2630        if (chan->state != VMCHAN_OPEN) {
2631            continue;
2632        }
2633
2634        if (!vmbus_channel_is_open(chan)) {
2635            /* reopen failed, abort loading */
2636            return -1;
2637        }
2638
2639        /* resume processing on the guest side if it missed the notification */
2640        hyperv_sint_route_set_sint(chan->notify_route);
2641        /* ditto on the host side */
2642        vmbus_channel_notify_host(chan);
2643    }
2644
2645    vmbus_resched(vmbus);
2646    return 0;
2647}
2648
2649static const VMStateDescription vmstate_post_message_input = {
2650    .name = "vmbus/hyperv_post_message_input",
2651    .version_id = 0,
2652    .minimum_version_id = 0,
2653    .fields = (VMStateField[]) {
2654        /*
2655         * skip connection_id and message_type as they are validated before
2656         * queueing and ignored on dequeueing
2657         */
2658        VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
2659        VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
2660                            HV_MESSAGE_PAYLOAD_SIZE),
2661        VMSTATE_END_OF_LIST()
2662    }
2663};
2664
2665static bool vmbus_rx_queue_needed(void *opaque)
2666{
2667    VMBus *vmbus = VMBUS(opaque);
2668    return vmbus->rx_queue_size;
2669}
2670
2671static const VMStateDescription vmstate_rx_queue = {
2672    .name = "vmbus/rx_queue",
2673    .version_id = 0,
2674    .minimum_version_id = 0,
2675    .needed = vmbus_rx_queue_needed,
2676    .fields = (VMStateField[]) {
2677        VMSTATE_UINT8(rx_queue_head, VMBus),
2678        VMSTATE_UINT8(rx_queue_size, VMBus),
2679        VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
2680                             HV_MSG_QUEUE_LEN, 0,
2681                             vmstate_post_message_input,
2682                             struct hyperv_post_message_input),
2683        VMSTATE_END_OF_LIST()
2684    }
2685};
2686
2687static const VMStateDescription vmstate_vmbus = {
2688    .name = TYPE_VMBUS,
2689    .version_id = 0,
2690    .minimum_version_id = 0,
2691    .pre_load = vmbus_pre_load,
2692    .post_load = vmbus_post_load,
2693    .fields = (VMStateField[]) {
2694        VMSTATE_UINT8(state, VMBus),
2695        VMSTATE_UINT32(version, VMBus),
2696        VMSTATE_UINT32(target_vp, VMBus),
2697        VMSTATE_UINT64(int_page_gpa, VMBus),
2698        VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
2699                         vmstate_gpadl, VMBusGpadl, link),
2700        VMSTATE_END_OF_LIST()
2701    },
2702    .subsections = (const VMStateDescription * []) {
2703        &vmstate_rx_queue,
2704        NULL
2705    }
2706};
2707
2708static const TypeInfo vmbus_type_info = {
2709    .name = TYPE_VMBUS,
2710    .parent = TYPE_BUS,
2711    .instance_size = sizeof(VMBus),
2712    .class_init = vmbus_class_init,
2713};
2714
2715static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
2716{
2717    VMBusBridge *bridge = VMBUS_BRIDGE(dev);
2718
2719    /*
2720     * here there's at least one vmbus bridge that is being realized, so
2721     * vmbus_bridge_find can only return NULL if it's not unique
2722     */
2723    if (!vmbus_bridge_find()) {
2724        error_setg(errp, "there can be at most one %s in the system",
2725                   TYPE_VMBUS_BRIDGE);
2726        return;
2727    }
2728
2729    if (!hyperv_is_synic_enabled()) {
2730        error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
2731        return;
2732    }
2733
2734    bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
2735}
2736
2737static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
2738{
2739    /* there can be only one VMBus */
2740    return g_strdup("0");
2741}
2742
2743static const VMStateDescription vmstate_vmbus_bridge = {
2744    .name = TYPE_VMBUS_BRIDGE,
2745    .version_id = 0,
2746    .minimum_version_id = 0,
2747    .fields = (VMStateField[]) {
2748        VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
2749        VMSTATE_END_OF_LIST()
2750    },
2751};
2752
2753static Property vmbus_bridge_props[] = {
2754    DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
2755    DEFINE_PROP_END_OF_LIST()
2756};
2757
2758static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
2759{
2760    DeviceClass *k = DEVICE_CLASS(klass);
2761    SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
2762
2763    k->realize = vmbus_bridge_realize;
2764    k->fw_name = "vmbus";
2765    sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
2766    set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
2767    k->vmsd = &vmstate_vmbus_bridge;
2768    device_class_set_props(k, vmbus_bridge_props);
2769    /* override SysBusDevice's default */
2770    k->user_creatable = true;
2771}
2772
2773static const TypeInfo vmbus_bridge_type_info = {
2774    .name = TYPE_VMBUS_BRIDGE,
2775    .parent = TYPE_SYS_BUS_DEVICE,
2776    .instance_size = sizeof(VMBusBridge),
2777    .class_init = vmbus_bridge_class_init,
2778};
2779
2780static void vmbus_register_types(void)
2781{
2782    type_register_static(&vmbus_bridge_type_info);
2783    type_register_static(&vmbus_dev_type_info);
2784    type_register_static(&vmbus_type_info);
2785}
2786
2787type_init(vmbus_register_types)
2788