qemu/contrib/libvhost-user/libvhost-user.c
<<
>>
Prefs
   1/*
   2 * Vhost User library
   3 *
   4 * Copyright IBM, Corp. 2007
   5 * Copyright (c) 2016 Red Hat, Inc.
   6 *
   7 * Authors:
   8 *  Anthony Liguori <aliguori@us.ibm.com>
   9 *  Marc-André Lureau <mlureau@redhat.com>
  10 *  Victor Kaplansky <victork@redhat.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or
  13 * later.  See the COPYING file in the top-level directory.
  14 */
  15
  16#include <qemu/osdep.h>
  17#include <sys/eventfd.h>
  18#include <linux/vhost.h>
  19
  20#include "qemu/atomic.h"
  21
  22#include "libvhost-user.h"
  23
  24#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
  25
  26/* The version of the protocol we support */
  27#define VHOST_USER_VERSION 1
  28#define LIBVHOST_USER_DEBUG 0
  29
  30#define DPRINT(...)                             \
  31    do {                                        \
  32        if (LIBVHOST_USER_DEBUG) {              \
  33            fprintf(stderr, __VA_ARGS__);        \
  34        }                                       \
  35    } while (0)
  36
  37static const char *
  38vu_request_to_string(int req)
  39{
  40#define REQ(req) [req] = #req
  41    static const char *vu_request_str[] = {
  42        REQ(VHOST_USER_NONE),
  43        REQ(VHOST_USER_GET_FEATURES),
  44        REQ(VHOST_USER_SET_FEATURES),
  45        REQ(VHOST_USER_NONE),
  46        REQ(VHOST_USER_GET_FEATURES),
  47        REQ(VHOST_USER_SET_FEATURES),
  48        REQ(VHOST_USER_SET_OWNER),
  49        REQ(VHOST_USER_RESET_OWNER),
  50        REQ(VHOST_USER_SET_MEM_TABLE),
  51        REQ(VHOST_USER_SET_LOG_BASE),
  52        REQ(VHOST_USER_SET_LOG_FD),
  53        REQ(VHOST_USER_SET_VRING_NUM),
  54        REQ(VHOST_USER_SET_VRING_ADDR),
  55        REQ(VHOST_USER_SET_VRING_BASE),
  56        REQ(VHOST_USER_GET_VRING_BASE),
  57        REQ(VHOST_USER_SET_VRING_KICK),
  58        REQ(VHOST_USER_SET_VRING_CALL),
  59        REQ(VHOST_USER_SET_VRING_ERR),
  60        REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
  61        REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
  62        REQ(VHOST_USER_GET_QUEUE_NUM),
  63        REQ(VHOST_USER_SET_VRING_ENABLE),
  64        REQ(VHOST_USER_SEND_RARP),
  65        REQ(VHOST_USER_INPUT_GET_CONFIG),
  66        REQ(VHOST_USER_MAX),
  67    };
  68#undef REQ
  69
  70    if (req < VHOST_USER_MAX) {
  71        return vu_request_str[req];
  72    } else {
  73        return "unknown";
  74    }
  75}
  76
  77static void
  78vu_panic(VuDev *dev, const char *msg, ...)
  79{
  80    char *buf = NULL;
  81    va_list ap;
  82
  83    va_start(ap, msg);
  84    buf = g_strdup_vprintf(msg, ap);
  85    va_end(ap);
  86
  87    dev->broken = true;
  88    dev->panic(dev, buf);
  89    free(buf);
  90
  91    /* FIXME: find a way to call virtio_error? */
  92}
  93
  94/* Translate guest physical address to our virtual address.  */
  95void *
  96vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
  97{
  98    int i;
  99
 100    /* Find matching memory region.  */
 101    for (i = 0; i < dev->nregions; i++) {
 102        VuDevRegion *r = &dev->regions[i];
 103
 104        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
 105            return (void *)(uintptr_t)
 106                guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
 107        }
 108    }
 109
 110    return NULL;
 111}
 112
 113/* Translate qemu virtual address to our virtual address.  */
 114static void *
 115qva_to_va(VuDev *dev, uint64_t qemu_addr)
 116{
 117    int i;
 118
 119    /* Find matching memory region.  */
 120    for (i = 0; i < dev->nregions; i++) {
 121        VuDevRegion *r = &dev->regions[i];
 122
 123        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
 124            return (void *)(uintptr_t)
 125                qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
 126        }
 127    }
 128
 129    return NULL;
 130}
 131
 132static void
 133vmsg_close_fds(VhostUserMsg *vmsg)
 134{
 135    int i;
 136
 137    for (i = 0; i < vmsg->fd_num; i++) {
 138        close(vmsg->fds[i]);
 139    }
 140}
 141
 142static bool
 143vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
 144{
 145    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
 146    struct iovec iov = {
 147        .iov_base = (char *)vmsg,
 148        .iov_len = VHOST_USER_HDR_SIZE,
 149    };
 150    struct msghdr msg = {
 151        .msg_iov = &iov,
 152        .msg_iovlen = 1,
 153        .msg_control = control,
 154        .msg_controllen = sizeof(control),
 155    };
 156    size_t fd_size;
 157    struct cmsghdr *cmsg;
 158    int rc;
 159
 160    do {
 161        rc = recvmsg(conn_fd, &msg, 0);
 162    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
 163
 164    if (rc < 0) {
 165        vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
 166        return false;
 167    }
 168
 169    vmsg->fd_num = 0;
 170    for (cmsg = CMSG_FIRSTHDR(&msg);
 171         cmsg != NULL;
 172         cmsg = CMSG_NXTHDR(&msg, cmsg))
 173    {
 174        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
 175            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
 176            vmsg->fd_num = fd_size / sizeof(int);
 177            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
 178            break;
 179        }
 180    }
 181
 182    if (vmsg->size > sizeof(vmsg->payload)) {
 183        vu_panic(dev,
 184                 "Error: too big message request: %d, size: vmsg->size: %u, "
 185                 "while sizeof(vmsg->payload) = %zu\n",
 186                 vmsg->request, vmsg->size, sizeof(vmsg->payload));
 187        goto fail;
 188    }
 189
 190    if (vmsg->size) {
 191        do {
 192            rc = read(conn_fd, &vmsg->payload, vmsg->size);
 193        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
 194
 195        if (rc <= 0) {
 196            vu_panic(dev, "Error while reading: %s", strerror(errno));
 197            goto fail;
 198        }
 199
 200        assert(rc == vmsg->size);
 201    }
 202
 203    return true;
 204
 205fail:
 206    vmsg_close_fds(vmsg);
 207
 208    return false;
 209}
 210
 211static bool
 212vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
 213{
 214    int rc;
 215    uint8_t *p = (uint8_t *)vmsg;
 216
 217    /* Set the version in the flags when sending the reply */
 218    vmsg->flags &= ~VHOST_USER_VERSION_MASK;
 219    vmsg->flags |= VHOST_USER_VERSION;
 220    vmsg->flags |= VHOST_USER_REPLY_MASK;
 221
 222    do {
 223        rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
 224    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
 225
 226    do {
 227        if (vmsg->data) {
 228            rc = write(conn_fd, vmsg->data, vmsg->size);
 229        } else {
 230            rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
 231        }
 232    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
 233
 234    if (rc <= 0) {
 235        vu_panic(dev, "Error while writing: %s", strerror(errno));
 236        return false;
 237    }
 238
 239    return true;
 240}
 241
 242/* Kick the log_call_fd if required. */
 243static void
 244vu_log_kick(VuDev *dev)
 245{
 246    if (dev->log_call_fd != -1) {
 247        DPRINT("Kicking the QEMU's log...\n");
 248        if (eventfd_write(dev->log_call_fd, 1) < 0) {
 249            vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
 250        }
 251    }
 252}
 253
 254static void
 255vu_log_page(uint8_t *log_table, uint64_t page)
 256{
 257    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
 258    atomic_or(&log_table[page / 8], 1 << (page % 8));
 259}
 260
 261static void
 262vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
 263{
 264    uint64_t page;
 265
 266    if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
 267        !dev->log_table || !length) {
 268        return;
 269    }
 270
 271    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
 272
 273    page = address / VHOST_LOG_PAGE;
 274    while (page * VHOST_LOG_PAGE < address + length) {
 275        vu_log_page(dev->log_table, page);
 276        page += VHOST_LOG_PAGE;
 277    }
 278
 279    vu_log_kick(dev);
 280}
 281
 282static void
 283vu_kick_cb(VuDev *dev, int condition, void *data)
 284{
 285    int index = (intptr_t)data;
 286    VuVirtq *vq = &dev->vq[index];
 287    int sock = vq->kick_fd;
 288    eventfd_t kick_data;
 289    ssize_t rc;
 290
 291    rc = eventfd_read(sock, &kick_data);
 292    if (rc == -1) {
 293        vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
 294        dev->remove_watch(dev, dev->vq[index].kick_fd);
 295    } else {
 296        DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
 297               kick_data, vq->handler, index);
 298        if (vq->handler) {
 299            vq->handler(dev, index);
 300        }
 301    }
 302}
 303
 304static bool
 305vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 306{
 307    vmsg->payload.u64 =
 308        1ULL << VHOST_F_LOG_ALL |
 309        1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
 310
 311    if (dev->iface->get_features) {
 312        vmsg->payload.u64 |= dev->iface->get_features(dev);
 313    }
 314
 315    vmsg->size = sizeof(vmsg->payload.u64);
 316
 317    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 318
 319    return true;
 320}
 321
 322static void
 323vu_set_enable_all_rings(VuDev *dev, bool enabled)
 324{
 325    int i;
 326
 327    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
 328        dev->vq[i].enable = enabled;
 329    }
 330}
 331
 332static bool
 333vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 334{
 335    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 336
 337    dev->features = vmsg->payload.u64;
 338
 339    if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
 340        vu_set_enable_all_rings(dev, true);
 341    }
 342
 343    if (dev->iface->set_features) {
 344        dev->iface->set_features(dev, dev->features);
 345    }
 346
 347    return false;
 348}
 349
 350static bool
 351vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
 352{
 353    return false;
 354}
 355
 356static void
 357vu_close_log(VuDev *dev)
 358{
 359    if (dev->log_table) {
 360        if (munmap(dev->log_table, dev->log_size) != 0) {
 361            perror("close log munmap() error");
 362        }
 363
 364        dev->log_table = NULL;
 365    }
 366    if (dev->log_call_fd != -1) {
 367        close(dev->log_call_fd);
 368        dev->log_call_fd = -1;
 369    }
 370}
 371
 372static bool
 373vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
 374{
 375    vu_set_enable_all_rings(dev, false);
 376
 377    return false;
 378}
 379
 380static bool
 381vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
 382{
 383    int i;
 384    VhostUserMemory *memory = &vmsg->payload.memory;
 385    dev->nregions = memory->nregions;
 386
 387    DPRINT("Nregions: %d\n", memory->nregions);
 388    for (i = 0; i < dev->nregions; i++) {
 389        void *mmap_addr;
 390        VhostUserMemoryRegion *msg_region = &memory->regions[i];
 391        VuDevRegion *dev_region = &dev->regions[i];
 392
 393        DPRINT("Region %d\n", i);
 394        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
 395               msg_region->guest_phys_addr);
 396        DPRINT("    memory_size:     0x%016"PRIx64"\n",
 397               msg_region->memory_size);
 398        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
 399               msg_region->userspace_addr);
 400        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
 401               msg_region->mmap_offset);
 402
 403        dev_region->gpa = msg_region->guest_phys_addr;
 404        dev_region->size = msg_region->memory_size;
 405        dev_region->qva = msg_region->userspace_addr;
 406        dev_region->mmap_offset = msg_region->mmap_offset;
 407
 408        /* We don't use offset argument of mmap() since the
 409         * mapped address has to be page aligned, and we use huge
 410         * pages.  */
 411        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
 412                         PROT_READ | PROT_WRITE, MAP_SHARED,
 413                         vmsg->fds[i], 0);
 414
 415        if (mmap_addr == MAP_FAILED) {
 416            vu_panic(dev, "region mmap error: %s", strerror(errno));
 417        } else {
 418            dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
 419            DPRINT("    mmap_addr:       0x%016"PRIx64"\n",
 420                   dev_region->mmap_addr);
 421        }
 422
 423        close(vmsg->fds[i]);
 424    }
 425
 426    return false;
 427}
 428
 429static bool
 430vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
 431{
 432    int fd;
 433    uint64_t log_mmap_size, log_mmap_offset;
 434    void *rc;
 435
 436    if (vmsg->fd_num != 1 ||
 437        vmsg->size != sizeof(vmsg->payload.log)) {
 438        vu_panic(dev, "Invalid log_base message");
 439        return true;
 440    }
 441
 442    fd = vmsg->fds[0];
 443    log_mmap_offset = vmsg->payload.log.mmap_offset;
 444    log_mmap_size = vmsg->payload.log.mmap_size;
 445    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
 446    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
 447
 448    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
 449              log_mmap_offset);
 450    if (rc == MAP_FAILED) {
 451        perror("log mmap error");
 452    }
 453    dev->log_table = rc;
 454    dev->log_size = log_mmap_size;
 455
 456    vmsg->size = sizeof(vmsg->payload.u64);
 457
 458    return true;
 459}
 460
 461static bool
 462vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
 463{
 464    if (vmsg->fd_num != 1) {
 465        vu_panic(dev, "Invalid log_fd message");
 466        return false;
 467    }
 468
 469    if (dev->log_call_fd != -1) {
 470        close(dev->log_call_fd);
 471    }
 472    dev->log_call_fd = vmsg->fds[0];
 473    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
 474
 475    return false;
 476}
 477
 478static bool
 479vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
 480{
 481    unsigned int index = vmsg->payload.state.index;
 482    unsigned int num = vmsg->payload.state.num;
 483
 484    DPRINT("State.index: %d\n", index);
 485    DPRINT("State.num:   %d\n", num);
 486    dev->vq[index].vring.num = num;
 487
 488    return false;
 489}
 490
 491static bool
 492vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
 493{
 494    struct vhost_vring_addr *vra = &vmsg->payload.addr;
 495    unsigned int index = vra->index;
 496    VuVirtq *vq = &dev->vq[index];
 497
 498    DPRINT("vhost_vring_addr:\n");
 499    DPRINT("    index:  %d\n", vra->index);
 500    DPRINT("    flags:  %d\n", vra->flags);
 501    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
 502    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
 503    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
 504    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
 505
 506    vq->vring.flags = vra->flags;
 507    vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
 508    vq->vring.used = qva_to_va(dev, vra->used_user_addr);
 509    vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
 510    vq->vring.log_guest_addr = vra->log_guest_addr;
 511
 512    DPRINT("Setting virtq addresses:\n");
 513    DPRINT("    vring_desc  at %p\n", vq->vring.desc);
 514    DPRINT("    vring_used  at %p\n", vq->vring.used);
 515    DPRINT("    vring_avail at %p\n", vq->vring.avail);
 516
 517    if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
 518        vu_panic(dev, "Invalid vring_addr message");
 519        return false;
 520    }
 521
 522    vq->used_idx = vq->vring.used->idx;
 523
 524    return false;
 525}
 526
 527static bool
 528vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
 529{
 530    unsigned int index = vmsg->payload.state.index;
 531    unsigned int num = vmsg->payload.state.num;
 532
 533    DPRINT("State.index: %d\n", index);
 534    DPRINT("State.num:   %d\n", num);
 535    dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
 536
 537    return false;
 538}
 539
 540static bool
 541vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
 542{
 543    unsigned int index = vmsg->payload.state.index;
 544
 545    DPRINT("State.index: %d\n", index);
 546    vmsg->payload.state.num = dev->vq[index].last_avail_idx;
 547    vmsg->size = sizeof(vmsg->payload.state);
 548
 549    dev->vq[index].started = false;
 550    if (dev->iface->queue_set_started) {
 551        dev->iface->queue_set_started(dev, index, false);
 552    }
 553
 554    if (dev->vq[index].call_fd != -1) {
 555        close(dev->vq[index].call_fd);
 556        dev->vq[index].call_fd = -1;
 557    }
 558    if (dev->vq[index].kick_fd != -1) {
 559        dev->remove_watch(dev, dev->vq[index].kick_fd);
 560        close(dev->vq[index].kick_fd);
 561        dev->vq[index].kick_fd = -1;
 562    }
 563
 564    return true;
 565}
 566
 567static bool
 568vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
 569{
 570    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 571
 572    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
 573        vmsg_close_fds(vmsg);
 574        vu_panic(dev, "Invalid queue index: %u", index);
 575        return false;
 576    }
 577
 578    if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
 579        vmsg->fd_num != 1) {
 580        vmsg_close_fds(vmsg);
 581        vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
 582        return false;
 583    }
 584
 585    return true;
 586}
 587
 588static bool
 589vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
 590{
 591    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 592
 593    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 594
 595    if (!vu_check_queue_msg_file(dev, vmsg)) {
 596        return false;
 597    }
 598
 599    if (dev->vq[index].kick_fd != -1) {
 600        dev->remove_watch(dev, dev->vq[index].kick_fd);
 601        close(dev->vq[index].kick_fd);
 602        dev->vq[index].kick_fd = -1;
 603    }
 604
 605    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
 606        dev->vq[index].kick_fd = vmsg->fds[0];
 607        DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
 608    }
 609
 610    dev->vq[index].started = true;
 611    if (dev->iface->queue_set_started) {
 612        dev->iface->queue_set_started(dev, index, true);
 613    }
 614
 615    if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
 616        dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
 617                       vu_kick_cb, (void *)(long)index);
 618
 619        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
 620               dev->vq[index].kick_fd, index);
 621    }
 622
 623    return false;
 624}
 625
 626void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
 627                          vu_queue_handler_cb handler)
 628{
 629    int qidx = vq - dev->vq;
 630
 631    vq->handler = handler;
 632    if (vq->kick_fd >= 0) {
 633        if (handler) {
 634            dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
 635                           vu_kick_cb, (void *)(long)qidx);
 636        } else {
 637            dev->remove_watch(dev, vq->kick_fd);
 638        }
 639    }
 640}
 641
 642static bool
 643vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
 644{
 645    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 646
 647    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 648
 649    if (!vu_check_queue_msg_file(dev, vmsg)) {
 650        return false;
 651    }
 652
 653    if (dev->vq[index].call_fd != -1) {
 654        close(dev->vq[index].call_fd);
 655        dev->vq[index].call_fd = -1;
 656    }
 657
 658    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
 659        dev->vq[index].call_fd = vmsg->fds[0];
 660    }
 661
 662    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
 663
 664    return false;
 665}
 666
 667static bool
 668vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
 669{
 670    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 671
 672    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
 673
 674    if (!vu_check_queue_msg_file(dev, vmsg)) {
 675        return false;
 676    }
 677
 678    if (dev->vq[index].err_fd != -1) {
 679        close(dev->vq[index].err_fd);
 680        dev->vq[index].err_fd = -1;
 681    }
 682
 683    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
 684        dev->vq[index].err_fd = vmsg->fds[0];
 685    }
 686
 687    return false;
 688}
 689
 690static bool
 691vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 692{
 693    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
 694
 695    if (dev->iface->get_protocol_features) {
 696        features |= dev->iface->get_protocol_features(dev);
 697    }
 698
 699    vmsg->payload.u64 = features;
 700    vmsg->size = sizeof(vmsg->payload.u64);
 701
 702    return true;
 703}
 704
 705static bool
 706vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 707{
 708    uint64_t features = vmsg->payload.u64;
 709
 710    DPRINT("u64: 0x%016"PRIx64"\n", features);
 711
 712    dev->protocol_features = vmsg->payload.u64;
 713
 714    if (dev->iface->set_protocol_features) {
 715        dev->iface->set_protocol_features(dev, features);
 716    }
 717
 718    return false;
 719}
 720
 721static bool
 722vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
 723{
 724    DPRINT("Function %s() not implemented yet.\n", __func__);
 725    return false;
 726}
 727
 728static bool
 729vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
 730{
 731    unsigned int index = vmsg->payload.state.index;
 732    unsigned int enable = vmsg->payload.state.num;
 733
 734    DPRINT("State.index: %d\n", index);
 735    DPRINT("State.enable:   %d\n", enable);
 736
 737    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
 738        vu_panic(dev, "Invalid vring_enable index: %u", index);
 739        return false;
 740    }
 741
 742    dev->vq[index].enable = enable;
 743    return false;
 744}
 745
 746static bool
 747vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
 748{
 749    int do_reply = 0;
 750
 751    /* Print out generic part of the request. */
 752    DPRINT("================ Vhost user message ================\n");
 753    DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
 754           vmsg->request);
 755    DPRINT("Flags:   0x%x\n", vmsg->flags);
 756    DPRINT("Size:    %d\n", vmsg->size);
 757
 758    if (vmsg->fd_num) {
 759        int i;
 760        DPRINT("Fds:");
 761        for (i = 0; i < vmsg->fd_num; i++) {
 762            DPRINT(" %d", vmsg->fds[i]);
 763        }
 764        DPRINT("\n");
 765    }
 766
 767    if (dev->iface->process_msg &&
 768        dev->iface->process_msg(dev, vmsg, &do_reply)) {
 769        return do_reply;
 770    }
 771
 772    switch (vmsg->request) {
 773    case VHOST_USER_GET_FEATURES:
 774        return vu_get_features_exec(dev, vmsg);
 775    case VHOST_USER_SET_FEATURES:
 776        return vu_set_features_exec(dev, vmsg);
 777    case VHOST_USER_GET_PROTOCOL_FEATURES:
 778        return vu_get_protocol_features_exec(dev, vmsg);
 779    case VHOST_USER_SET_PROTOCOL_FEATURES:
 780        return vu_set_protocol_features_exec(dev, vmsg);
 781    case VHOST_USER_SET_OWNER:
 782        return vu_set_owner_exec(dev, vmsg);
 783    case VHOST_USER_RESET_OWNER:
 784        return vu_reset_device_exec(dev, vmsg);
 785    case VHOST_USER_SET_MEM_TABLE:
 786        return vu_set_mem_table_exec(dev, vmsg);
 787    case VHOST_USER_SET_LOG_BASE:
 788        return vu_set_log_base_exec(dev, vmsg);
 789    case VHOST_USER_SET_LOG_FD:
 790        return vu_set_log_fd_exec(dev, vmsg);
 791    case VHOST_USER_SET_VRING_NUM:
 792        return vu_set_vring_num_exec(dev, vmsg);
 793    case VHOST_USER_SET_VRING_ADDR:
 794        return vu_set_vring_addr_exec(dev, vmsg);
 795    case VHOST_USER_SET_VRING_BASE:
 796        return vu_set_vring_base_exec(dev, vmsg);
 797    case VHOST_USER_GET_VRING_BASE:
 798        return vu_get_vring_base_exec(dev, vmsg);
 799    case VHOST_USER_SET_VRING_KICK:
 800        return vu_set_vring_kick_exec(dev, vmsg);
 801    case VHOST_USER_SET_VRING_CALL:
 802        return vu_set_vring_call_exec(dev, vmsg);
 803    case VHOST_USER_SET_VRING_ERR:
 804        return vu_set_vring_err_exec(dev, vmsg);
 805    case VHOST_USER_GET_QUEUE_NUM:
 806        return vu_get_queue_num_exec(dev, vmsg);
 807    case VHOST_USER_SET_VRING_ENABLE:
 808        return vu_set_vring_enable_exec(dev, vmsg);
 809    case VHOST_USER_NONE:
 810        break;
 811    default:
 812        vmsg_close_fds(vmsg);
 813        vu_panic(dev, "Unhandled request: %d", vmsg->request);
 814    }
 815
 816    return false;
 817}
 818
 819bool
 820vu_dispatch(VuDev *dev)
 821{
 822    VhostUserMsg vmsg = { 0, };
 823    int reply_requested;
 824    bool success = false;
 825
 826    if (!vu_message_read(dev, dev->sock, &vmsg)) {
 827        goto end;
 828    }
 829
 830    reply_requested = vu_process_message(dev, &vmsg);
 831    if (!reply_requested) {
 832        success = true;
 833        goto end;
 834    }
 835
 836    if (!vu_message_write(dev, dev->sock, &vmsg)) {
 837        goto end;
 838    }
 839
 840    success = true;
 841
 842end:
 843    g_free(vmsg.data);
 844    return success;
 845}
 846
 847void
 848vu_deinit(VuDev *dev)
 849{
 850    int i;
 851
 852    for (i = 0; i < dev->nregions; i++) {
 853        VuDevRegion *r = &dev->regions[i];
 854        void *m = (void *) (uintptr_t) r->mmap_addr;
 855        if (m != MAP_FAILED) {
 856            munmap(m, r->size + r->mmap_offset);
 857        }
 858    }
 859    dev->nregions = 0;
 860
 861    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
 862        VuVirtq *vq = &dev->vq[i];
 863
 864        if (vq->call_fd != -1) {
 865            close(vq->call_fd);
 866            vq->call_fd = -1;
 867        }
 868
 869        if (vq->kick_fd != -1) {
 870            close(vq->kick_fd);
 871            vq->kick_fd = -1;
 872        }
 873
 874        if (vq->err_fd != -1) {
 875            close(vq->err_fd);
 876            vq->err_fd = -1;
 877        }
 878    }
 879
 880
 881    vu_close_log(dev);
 882
 883    if (dev->sock != -1) {
 884        close(dev->sock);
 885    }
 886}
 887
 888void
 889vu_init(VuDev *dev,
 890        int socket,
 891        vu_panic_cb panic,
 892        vu_set_watch_cb set_watch,
 893        vu_remove_watch_cb remove_watch,
 894        const VuDevIface *iface)
 895{
 896    int i;
 897
 898    assert(socket >= 0);
 899    assert(set_watch);
 900    assert(remove_watch);
 901    assert(iface);
 902    assert(panic);
 903
 904    memset(dev, 0, sizeof(*dev));
 905
 906    dev->sock = socket;
 907    dev->panic = panic;
 908    dev->set_watch = set_watch;
 909    dev->remove_watch = remove_watch;
 910    dev->iface = iface;
 911    dev->log_call_fd = -1;
 912    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
 913        dev->vq[i] = (VuVirtq) {
 914            .call_fd = -1, .kick_fd = -1, .err_fd = -1,
 915            .notification = true,
 916        };
 917    }
 918}
 919
 920VuVirtq *
 921vu_get_queue(VuDev *dev, int qidx)
 922{
 923    assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
 924    return &dev->vq[qidx];
 925}
 926
 927bool
 928vu_queue_enabled(VuDev *dev, VuVirtq *vq)
 929{
 930    return vq->enable;
 931}
 932
 933static inline uint16_t
 934vring_avail_flags(VuVirtq *vq)
 935{
 936    return vq->vring.avail->flags;
 937}
 938
 939static inline uint16_t
 940vring_avail_idx(VuVirtq *vq)
 941{
 942    vq->shadow_avail_idx = vq->vring.avail->idx;
 943
 944    return vq->shadow_avail_idx;
 945}
 946
 947static inline uint16_t
 948vring_avail_ring(VuVirtq *vq, int i)
 949{
 950    return vq->vring.avail->ring[i];
 951}
 952
 953static inline uint16_t
 954vring_get_used_event(VuVirtq *vq)
 955{
 956    return vring_avail_ring(vq, vq->vring.num);
 957}
 958
 959static int
 960virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
 961{
 962    uint16_t num_heads = vring_avail_idx(vq) - idx;
 963
 964    /* Check it isn't doing very strange things with descriptor numbers. */
 965    if (num_heads > vq->vring.num) {
 966        vu_panic(dev, "Guest moved used index from %u to %u",
 967                 idx, vq->shadow_avail_idx);
 968        return -1;
 969    }
 970    if (num_heads) {
 971        /* On success, callers read a descriptor at vq->last_avail_idx.
 972         * Make sure descriptor read does not bypass avail index read. */
 973        smp_rmb();
 974    }
 975
 976    return num_heads;
 977}
 978
 979static bool
 980virtqueue_get_head(VuDev *dev, VuVirtq *vq,
 981                   unsigned int idx, unsigned int *head)
 982{
 983    /* Grab the next descriptor number they're advertising, and increment
 984     * the index we've seen. */
 985    *head = vring_avail_ring(vq, idx % vq->vring.num);
 986
 987    /* If their number is silly, that's a fatal mistake. */
 988    if (*head >= vq->vring.num) {
 989        vu_panic(dev, "Guest says index %u is available", head);
 990        return false;
 991    }
 992
 993    return true;
 994}
 995
 996enum {
 997    VIRTQUEUE_READ_DESC_ERROR = -1,
 998    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
 999    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1000};
1001
1002static int
1003virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
1004                         int i, unsigned int max, unsigned int *next)
1005{
1006    /* If this descriptor says it doesn't chain, we're done. */
1007    if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
1008        return VIRTQUEUE_READ_DESC_DONE;
1009    }
1010
1011    /* Check they're not leading us off end of descriptors. */
1012    *next = desc[i].next;
1013    /* Make sure compiler knows to grab that: we don't want it changing! */
1014    smp_wmb();
1015
1016    if (*next >= max) {
1017        vu_panic(dev, "Desc next is %u", next);
1018        return VIRTQUEUE_READ_DESC_ERROR;
1019    }
1020
1021    return VIRTQUEUE_READ_DESC_MORE;
1022}
1023
1024void
1025vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
1026                         unsigned int *out_bytes,
1027                         unsigned max_in_bytes, unsigned max_out_bytes)
1028{
1029    unsigned int idx;
1030    unsigned int total_bufs, in_total, out_total;
1031    int rc;
1032
1033    idx = vq->last_avail_idx;
1034
1035    total_bufs = in_total = out_total = 0;
1036    if (unlikely(dev->broken) ||
1037        unlikely(!vq->vring.avail)) {
1038        goto done;
1039    }
1040
1041    while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
1042        unsigned int max, num_bufs, indirect = 0;
1043        struct vring_desc *desc;
1044        unsigned int i;
1045
1046        max = vq->vring.num;
1047        num_bufs = total_bufs;
1048        if (!virtqueue_get_head(dev, vq, idx++, &i)) {
1049            goto err;
1050        }
1051        desc = vq->vring.desc;
1052
1053        if (desc[i].flags & VRING_DESC_F_INDIRECT) {
1054            if (desc[i].len % sizeof(struct vring_desc)) {
1055                vu_panic(dev, "Invalid size for indirect buffer table");
1056                goto err;
1057            }
1058
1059            /* If we've got too many, that implies a descriptor loop. */
1060            if (num_bufs >= max) {
1061                vu_panic(dev, "Looped descriptor");
1062                goto err;
1063            }
1064
1065            /* loop over the indirect descriptor table */
1066            indirect = 1;
1067            max = desc[i].len / sizeof(struct vring_desc);
1068            desc = vu_gpa_to_va(dev, desc[i].addr);
1069            num_bufs = i = 0;
1070        }
1071
1072        do {
1073            /* If we've got too many, that implies a descriptor loop. */
1074            if (++num_bufs > max) {
1075                vu_panic(dev, "Looped descriptor");
1076                goto err;
1077            }
1078
1079            if (desc[i].flags & VRING_DESC_F_WRITE) {
1080                in_total += desc[i].len;
1081            } else {
1082                out_total += desc[i].len;
1083            }
1084            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1085                goto done;
1086            }
1087            rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
1088        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1089
1090        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1091            goto err;
1092        }
1093
1094        if (!indirect) {
1095            total_bufs = num_bufs;
1096        } else {
1097            total_bufs++;
1098        }
1099    }
1100    if (rc < 0) {
1101        goto err;
1102    }
1103done:
1104    if (in_bytes) {
1105        *in_bytes = in_total;
1106    }
1107    if (out_bytes) {
1108        *out_bytes = out_total;
1109    }
1110    return;
1111
1112err:
1113    in_total = out_total = 0;
1114    goto done;
1115}
1116
1117bool
1118vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
1119                     unsigned int out_bytes)
1120{
1121    unsigned int in_total, out_total;
1122
1123    vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
1124                             in_bytes, out_bytes);
1125
1126    return in_bytes <= in_total && out_bytes <= out_total;
1127}
1128
1129/* Fetch avail_idx from VQ memory only when we really need to know if
1130 * guest has added some buffers. */
1131bool
1132vu_queue_empty(VuDev *dev, VuVirtq *vq)
1133{
1134    if (unlikely(dev->broken) ||
1135        unlikely(!vq->vring.avail)) {
1136        return true;
1137    }
1138
1139    if (vq->shadow_avail_idx != vq->last_avail_idx) {
1140        return false;
1141    }
1142
1143    return vring_avail_idx(vq) == vq->last_avail_idx;
1144}
1145
1146static inline
1147bool has_feature(uint64_t features, unsigned int fbit)
1148{
1149    assert(fbit < 64);
1150    return !!(features & (1ULL << fbit));
1151}
1152
1153static inline
1154bool vu_has_feature(VuDev *dev,
1155                    unsigned int fbit)
1156{
1157    return has_feature(dev->features, fbit);
1158}
1159
1160static bool
1161vring_notify(VuDev *dev, VuVirtq *vq)
1162{
1163    uint16_t old, new;
1164    bool v;
1165
1166    /* We need to expose used array entries before checking used event. */
1167    smp_mb();
1168
1169    /* Always notify when queue is empty (when feature acknowledge) */
1170    if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1171        !vq->inuse && vu_queue_empty(dev, vq)) {
1172        return true;
1173    }
1174
1175    if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
1176        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1177    }
1178
1179    v = vq->signalled_used_valid;
1180    vq->signalled_used_valid = true;
1181    old = vq->signalled_used;
1182    new = vq->signalled_used = vq->used_idx;
1183    return !v || vring_need_event(vring_get_used_event(vq), new, old);
1184}
1185
1186void
1187vu_queue_notify(VuDev *dev, VuVirtq *vq)
1188{
1189    if (unlikely(dev->broken) ||
1190        unlikely(!vq->vring.avail)) {
1191        return;
1192    }
1193
1194    if (!vring_notify(dev, vq)) {
1195        DPRINT("skipped notify...\n");
1196        return;
1197    }
1198
1199    if (eventfd_write(vq->call_fd, 1) < 0) {
1200        vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
1201    }
1202}
1203
1204static inline void
1205vring_used_flags_set_bit(VuVirtq *vq, int mask)
1206{
1207    uint16_t *flags;
1208
1209    flags = (uint16_t *)((char*)vq->vring.used +
1210                         offsetof(struct vring_used, flags));
1211    *flags |= mask;
1212}
1213
1214static inline void
1215vring_used_flags_unset_bit(VuVirtq *vq, int mask)
1216{
1217    uint16_t *flags;
1218
1219    flags = (uint16_t *)((char*)vq->vring.used +
1220                         offsetof(struct vring_used, flags));
1221    *flags &= ~mask;
1222}
1223
1224static inline void
1225vring_set_avail_event(VuVirtq *vq, uint16_t val)
1226{
1227    if (!vq->notification) {
1228        return;
1229    }
1230
1231    *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
1232}
1233
1234void
1235vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
1236{
1237    vq->notification = enable;
1238    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
1239        vring_set_avail_event(vq, vring_avail_idx(vq));
1240    } else if (enable) {
1241        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
1242    } else {
1243        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
1244    }
1245    if (enable) {
1246        /* Expose avail event/used flags before caller checks the avail idx. */
1247        smp_mb();
1248    }
1249}
1250
1251static void
1252virtqueue_map_desc(VuDev *dev,
1253                   unsigned int *p_num_sg, struct iovec *iov,
1254                   unsigned int max_num_sg, bool is_write,
1255                   uint64_t pa, size_t sz)
1256{
1257    unsigned num_sg = *p_num_sg;
1258
1259    assert(num_sg <= max_num_sg);
1260
1261    if (!sz) {
1262        vu_panic(dev, "virtio: zero sized buffers are not allowed");
1263        return;
1264    }
1265
1266    iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
1267    iov[num_sg].iov_len = sz;
1268    num_sg++;
1269
1270    *p_num_sg = num_sg;
1271}
1272
1273/* Round number down to multiple */
1274#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
1275
1276/* Round number up to multiple */
1277#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
1278
1279static void *
1280virtqueue_alloc_element(size_t sz,
1281                                     unsigned out_num, unsigned in_num)
1282{
1283    VuVirtqElement *elem;
1284    size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
1285    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1286    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1287
1288    assert(sz >= sizeof(VuVirtqElement));
1289    elem = malloc(out_sg_end);
1290    elem->out_num = out_num;
1291    elem->in_num = in_num;
1292    elem->in_sg = (void *)elem + in_sg_ofs;
1293    elem->out_sg = (void *)elem + out_sg_ofs;
1294    return elem;
1295}
1296
1297void *
1298vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
1299{
1300    unsigned int i, head, max;
1301    VuVirtqElement *elem;
1302    unsigned out_num, in_num;
1303    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1304    struct vring_desc *desc;
1305    int rc;
1306
1307    if (unlikely(dev->broken) ||
1308        unlikely(!vq->vring.avail)) {
1309        return NULL;
1310    }
1311
1312    if (vu_queue_empty(dev, vq)) {
1313        return NULL;
1314    }
1315    /* Needed after virtio_queue_empty(), see comment in
1316     * virtqueue_num_heads(). */
1317    smp_rmb();
1318
1319    /* When we start there are none of either input nor output. */
1320    out_num = in_num = 0;
1321
1322    max = vq->vring.num;
1323    if (vq->inuse >= vq->vring.num) {
1324        vu_panic(dev, "Virtqueue size exceeded");
1325        return NULL;
1326    }
1327
1328    if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
1329        return NULL;
1330    }
1331
1332    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
1333        vring_set_avail_event(vq, vq->last_avail_idx);
1334    }
1335
1336    i = head;
1337    desc = vq->vring.desc;
1338    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
1339        if (desc[i].len % sizeof(struct vring_desc)) {
1340            vu_panic(dev, "Invalid size for indirect buffer table");
1341        }
1342
1343        /* loop over the indirect descriptor table */
1344        max = desc[i].len / sizeof(struct vring_desc);
1345        desc = vu_gpa_to_va(dev, desc[i].addr);
1346        i = 0;
1347    }
1348
1349    /* Collect all the descriptors */
1350    do {
1351        if (desc[i].flags & VRING_DESC_F_WRITE) {
1352            virtqueue_map_desc(dev, &in_num, iov + out_num,
1353                               VIRTQUEUE_MAX_SIZE - out_num, true,
1354                               desc[i].addr, desc[i].len);
1355        } else {
1356            if (in_num) {
1357                vu_panic(dev, "Incorrect order for descriptors");
1358                return NULL;
1359            }
1360            virtqueue_map_desc(dev, &out_num, iov,
1361                               VIRTQUEUE_MAX_SIZE, false,
1362                               desc[i].addr, desc[i].len);
1363        }
1364
1365        /* If we've got too many, that implies a descriptor loop. */
1366        if ((in_num + out_num) > max) {
1367            vu_panic(dev, "Looped descriptor");
1368        }
1369        rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
1370    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1371
1372    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1373        return NULL;
1374    }
1375
1376    /* Now copy what we have collected and mapped */
1377    elem = virtqueue_alloc_element(sz, out_num, in_num);
1378    elem->index = head;
1379    for (i = 0; i < out_num; i++) {
1380        elem->out_sg[i] = iov[i];
1381    }
1382    for (i = 0; i < in_num; i++) {
1383        elem->in_sg[i] = iov[out_num + i];
1384    }
1385
1386    vq->inuse++;
1387
1388    return elem;
1389}
1390
1391bool
1392vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
1393{
1394    if (num > vq->inuse) {
1395        return false;
1396    }
1397    vq->last_avail_idx -= num;
1398    vq->inuse -= num;
1399    return true;
1400}
1401
1402static inline
1403void vring_used_write(VuDev *dev, VuVirtq *vq,
1404                      struct vring_used_elem *uelem, int i)
1405{
1406    struct vring_used *used = vq->vring.used;
1407
1408    used->ring[i] = *uelem;
1409    vu_log_write(dev, vq->vring.log_guest_addr +
1410                 offsetof(struct vring_used, ring[i]),
1411                 sizeof(used->ring[i]));
1412}
1413
1414
1415static void
1416vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
1417                  const VuVirtqElement *elem,
1418                  unsigned int len)
1419{
1420    struct vring_desc *desc = vq->vring.desc;
1421    unsigned int i, max, min;
1422    unsigned num_bufs = 0;
1423
1424    max = vq->vring.num;
1425    i = elem->index;
1426
1427    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
1428        if (desc[i].len % sizeof(struct vring_desc)) {
1429            vu_panic(dev, "Invalid size for indirect buffer table");
1430        }
1431
1432        /* loop over the indirect descriptor table */
1433        max = desc[i].len / sizeof(struct vring_desc);
1434        desc = vu_gpa_to_va(dev, desc[i].addr);
1435        i = 0;
1436    }
1437
1438    do {
1439        if (++num_bufs > max) {
1440            vu_panic(dev, "Looped descriptor");
1441            return;
1442        }
1443
1444        if (desc[i].flags & VRING_DESC_F_WRITE) {
1445            min = MIN(desc[i].len, len);
1446            vu_log_write(dev, desc[i].addr, min);
1447            len -= min;
1448        }
1449
1450    } while (len > 0 &&
1451             (virtqueue_read_next_desc(dev, desc, i, max, &i)
1452              == VIRTQUEUE_READ_DESC_MORE));
1453}
1454
1455void
1456vu_queue_fill(VuDev *dev, VuVirtq *vq,
1457              const VuVirtqElement *elem,
1458              unsigned int len, unsigned int idx)
1459{
1460    struct vring_used_elem uelem;
1461
1462    if (unlikely(dev->broken) ||
1463        unlikely(!vq->vring.avail)) {
1464        return;
1465    }
1466
1467    vu_log_queue_fill(dev, vq, elem, len);
1468
1469    idx = (idx + vq->used_idx) % vq->vring.num;
1470
1471    uelem.id = elem->index;
1472    uelem.len = len;
1473    vring_used_write(dev, vq, &uelem, idx);
1474}
1475
1476static inline
1477void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
1478{
1479    vq->vring.used->idx = val;
1480    vu_log_write(dev,
1481                 vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
1482                 sizeof(vq->vring.used->idx));
1483
1484    vq->used_idx = val;
1485}
1486
1487void
1488vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
1489{
1490    uint16_t old, new;
1491
1492    if (unlikely(dev->broken) ||
1493        unlikely(!vq->vring.avail)) {
1494        return;
1495    }
1496
1497    /* Make sure buffer is written before we update index. */
1498    smp_wmb();
1499
1500    old = vq->used_idx;
1501    new = old + count;
1502    vring_used_idx_set(dev, vq, new);
1503    vq->inuse -= count;
1504    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
1505        vq->signalled_used_valid = false;
1506    }
1507}
1508
1509void
1510vu_queue_push(VuDev *dev, VuVirtq *vq,
1511              const VuVirtqElement *elem, unsigned int len)
1512{
1513    vu_queue_fill(dev, vq, elem, len, 0);
1514    vu_queue_flush(dev, vq, 1);
1515}
1516