qemu/contrib/libvhost-user/libvhost-user.h
<<
>>
Prefs
   1/*
   2 * Vhost User library
   3 *
   4 * Copyright (c) 2016 Red Hat, Inc.
   5 *
   6 * Authors:
   7 *  Victor Kaplansky <victork@redhat.com>
   8 *  Marc-André Lureau <mlureau@redhat.com>
   9 *
  10 * This work is licensed under the terms of the GNU GPL, version 2 or
  11 * later.  See the COPYING file in the top-level directory.
  12 */
  13
  14#ifndef LIBVHOST_USER_H
  15#define LIBVHOST_USER_H
  16
  17#include <stdint.h>
  18#include <stdbool.h>
  19#include <stddef.h>
  20#include <sys/poll.h>
  21#include <linux/vhost.h>
  22#include "standard-headers/linux/virtio_ring.h"
  23
  24/* Based on qemu/hw/virtio/vhost-user.c */
  25#define VHOST_USER_F_PROTOCOL_FEATURES 30
  26#define VHOST_LOG_PAGE 4096
  27
  28#define VIRTQUEUE_MAX_SIZE 1024
  29
  30#define VHOST_MEMORY_MAX_NREGIONS 8
  31
  32typedef enum VhostSetConfigType {
  33    VHOST_SET_CONFIG_TYPE_MASTER = 0,
  34    VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
  35} VhostSetConfigType;
  36
  37/*
  38 * Maximum size of virtio device config space
  39 */
  40#define VHOST_USER_MAX_CONFIG_SIZE 256
  41
  42enum VhostUserProtocolFeature {
  43    VHOST_USER_PROTOCOL_F_MQ = 0,
  44    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  45    VHOST_USER_PROTOCOL_F_RARP = 2,
  46    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  47    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  48    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  49    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  50    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  51    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  52    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  53    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  54    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  55    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  56
  57    VHOST_USER_PROTOCOL_F_MAX
  58};
  59
  60#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  61
  62typedef enum VhostUserRequest {
  63    VHOST_USER_NONE = 0,
  64    VHOST_USER_GET_FEATURES = 1,
  65    VHOST_USER_SET_FEATURES = 2,
  66    VHOST_USER_SET_OWNER = 3,
  67    VHOST_USER_RESET_OWNER = 4,
  68    VHOST_USER_SET_MEM_TABLE = 5,
  69    VHOST_USER_SET_LOG_BASE = 6,
  70    VHOST_USER_SET_LOG_FD = 7,
  71    VHOST_USER_SET_VRING_NUM = 8,
  72    VHOST_USER_SET_VRING_ADDR = 9,
  73    VHOST_USER_SET_VRING_BASE = 10,
  74    VHOST_USER_GET_VRING_BASE = 11,
  75    VHOST_USER_SET_VRING_KICK = 12,
  76    VHOST_USER_SET_VRING_CALL = 13,
  77    VHOST_USER_SET_VRING_ERR = 14,
  78    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  79    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  80    VHOST_USER_GET_QUEUE_NUM = 17,
  81    VHOST_USER_SET_VRING_ENABLE = 18,
  82    VHOST_USER_SEND_RARP = 19,
  83    VHOST_USER_NET_SET_MTU = 20,
  84    VHOST_USER_SET_SLAVE_REQ_FD = 21,
  85    VHOST_USER_IOTLB_MSG = 22,
  86    VHOST_USER_SET_VRING_ENDIAN = 23,
  87    VHOST_USER_GET_CONFIG = 24,
  88    VHOST_USER_SET_CONFIG = 25,
  89    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
  90    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
  91    VHOST_USER_POSTCOPY_ADVISE  = 28,
  92    VHOST_USER_POSTCOPY_LISTEN  = 29,
  93    VHOST_USER_POSTCOPY_END     = 30,
  94    VHOST_USER_GET_INFLIGHT_FD = 31,
  95    VHOST_USER_SET_INFLIGHT_FD = 32,
  96    VHOST_USER_GPU_SET_SOCKET = 33,
  97    VHOST_USER_MAX
  98} VhostUserRequest;
  99
 100typedef enum VhostUserSlaveRequest {
 101    VHOST_USER_SLAVE_NONE = 0,
 102    VHOST_USER_SLAVE_IOTLB_MSG = 1,
 103    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
 104    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 105    VHOST_USER_SLAVE_MAX
 106}  VhostUserSlaveRequest;
 107
 108typedef struct VhostUserMemoryRegion {
 109    uint64_t guest_phys_addr;
 110    uint64_t memory_size;
 111    uint64_t userspace_addr;
 112    uint64_t mmap_offset;
 113} VhostUserMemoryRegion;
 114
 115typedef struct VhostUserMemory {
 116    uint32_t nregions;
 117    uint32_t padding;
 118    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 119} VhostUserMemory;
 120
 121typedef struct VhostUserLog {
 122    uint64_t mmap_size;
 123    uint64_t mmap_offset;
 124} VhostUserLog;
 125
 126typedef struct VhostUserConfig {
 127    uint32_t offset;
 128    uint32_t size;
 129    uint32_t flags;
 130    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 131} VhostUserConfig;
 132
 133static VhostUserConfig c __attribute__ ((unused));
 134#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 135                                   + sizeof(c.size) \
 136                                   + sizeof(c.flags))
 137
 138typedef struct VhostUserVringArea {
 139    uint64_t u64;
 140    uint64_t size;
 141    uint64_t offset;
 142} VhostUserVringArea;
 143
 144typedef struct VhostUserInflight {
 145    uint64_t mmap_size;
 146    uint64_t mmap_offset;
 147    uint16_t num_queues;
 148    uint16_t queue_size;
 149} VhostUserInflight;
 150
 151#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
 152# define VU_PACKED __attribute__((gcc_struct, packed))
 153#else
 154# define VU_PACKED __attribute__((packed))
 155#endif
 156
 157typedef struct VhostUserMsg {
 158    int request;
 159
 160#define VHOST_USER_VERSION_MASK     (0x3)
 161#define VHOST_USER_REPLY_MASK       (0x1 << 2)
 162#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 163    uint32_t flags;
 164    uint32_t size; /* the following payload size */
 165
 166    union {
 167#define VHOST_USER_VRING_IDX_MASK   (0xff)
 168#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
 169        uint64_t u64;
 170        struct vhost_vring_state state;
 171        struct vhost_vring_addr addr;
 172        VhostUserMemory memory;
 173        VhostUserLog log;
 174        VhostUserConfig config;
 175        VhostUserVringArea area;
 176        VhostUserInflight inflight;
 177    } payload;
 178
 179    int fds[VHOST_MEMORY_MAX_NREGIONS];
 180    int fd_num;
 181    uint8_t *data;
 182} VU_PACKED VhostUserMsg;
 183
 184typedef struct VuDevRegion {
 185    /* Guest Physical address. */
 186    uint64_t gpa;
 187    /* Memory region size. */
 188    uint64_t size;
 189    /* QEMU virtual address (userspace). */
 190    uint64_t qva;
 191    /* Starting offset in our mmaped space. */
 192    uint64_t mmap_offset;
 193    /* Start address of mmaped space. */
 194    uint64_t mmap_addr;
 195} VuDevRegion;
 196
 197typedef struct VuDev VuDev;
 198
 199typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
 200typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
 201typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
 202                                  int *do_reply);
 203typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
 204typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
 205typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
 206typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
 207                                 uint32_t offset, uint32_t size,
 208                                 uint32_t flags);
 209
 210typedef struct VuDevIface {
 211    /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
 212    vu_get_features_cb get_features;
 213    /* enable vhost implementation features */
 214    vu_set_features_cb set_features;
 215    /* get the protocol feature bitmask from the underlying vhost
 216     * implementation */
 217    vu_get_features_cb get_protocol_features;
 218    /* enable protocol features in the underlying vhost implementation. */
 219    vu_set_features_cb set_protocol_features;
 220    /* process_msg is called for each vhost-user message received */
 221    /* skip libvhost-user processing if return value != 0 */
 222    vu_process_msg_cb process_msg;
 223    /* tells when queues can be processed */
 224    vu_queue_set_started_cb queue_set_started;
 225    /*
 226     * If the queue is processed in order, in which case it will be
 227     * resumed to vring.used->idx. This can help to support resuming
 228     * on unmanaged exit/crash.
 229     */
 230    vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
 231    /* get the config space of the device */
 232    vu_get_config_cb get_config;
 233    /* set the config space of the device */
 234    vu_set_config_cb set_config;
 235} VuDevIface;
 236
 237typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
 238
 239typedef struct VuRing {
 240    unsigned int num;
 241    struct vring_desc *desc;
 242    struct vring_avail *avail;
 243    struct vring_used *used;
 244    uint64_t log_guest_addr;
 245    uint32_t flags;
 246} VuRing;
 247
 248typedef struct VuDescStateSplit {
 249    /* Indicate whether this descriptor is inflight or not.
 250     * Only available for head-descriptor. */
 251    uint8_t inflight;
 252
 253    /* Padding */
 254    uint8_t padding[5];
 255
 256    /* Maintain a list for the last batch of used descriptors.
 257     * Only available when batching is used for submitting */
 258    uint16_t next;
 259
 260    /* Used to preserve the order of fetching available descriptors.
 261     * Only available for head-descriptor. */
 262    uint64_t counter;
 263} VuDescStateSplit;
 264
 265typedef struct VuVirtqInflight {
 266    /* The feature flags of this region. Now it's initialized to 0. */
 267    uint64_t features;
 268
 269    /* The version of this region. It's 1 currently.
 270     * Zero value indicates a vm reset happened. */
 271    uint16_t version;
 272
 273    /* The size of VuDescStateSplit array. It's equal to the virtqueue
 274     * size. Slave could get it from queue size field of VhostUserInflight. */
 275    uint16_t desc_num;
 276
 277    /* The head of list that track the last batch of used descriptors. */
 278    uint16_t last_batch_head;
 279
 280    /* Storing the idx value of used ring */
 281    uint16_t used_idx;
 282
 283    /* Used to track the state of each descriptor in descriptor table */
 284    VuDescStateSplit desc[0];
 285} VuVirtqInflight;
 286
 287typedef struct VuVirtqInflightDesc {
 288    uint16_t index;
 289    uint64_t counter;
 290} VuVirtqInflightDesc;
 291
 292typedef struct VuVirtq {
 293    VuRing vring;
 294
 295    VuVirtqInflight *inflight;
 296
 297    VuVirtqInflightDesc *resubmit_list;
 298
 299    uint16_t resubmit_num;
 300
 301    uint64_t counter;
 302
 303    /* Next head to pop */
 304    uint16_t last_avail_idx;
 305
 306    /* Last avail_idx read from VQ. */
 307    uint16_t shadow_avail_idx;
 308
 309    uint16_t used_idx;
 310
 311    /* Last used index value we have signalled on */
 312    uint16_t signalled_used;
 313
 314    /* Last used index value we have signalled on */
 315    bool signalled_used_valid;
 316
 317    /* Notification enabled? */
 318    bool notification;
 319
 320    int inuse;
 321
 322    vu_queue_handler_cb handler;
 323
 324    int call_fd;
 325    int kick_fd;
 326    int err_fd;
 327    unsigned int enable;
 328    bool started;
 329} VuVirtq;
 330
 331enum VuWatchCondtion {
 332    VU_WATCH_IN = POLLIN,
 333    VU_WATCH_OUT = POLLOUT,
 334    VU_WATCH_PRI = POLLPRI,
 335    VU_WATCH_ERR = POLLERR,
 336    VU_WATCH_HUP = POLLHUP,
 337};
 338
 339typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
 340typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
 341typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
 342                                 vu_watch_cb cb, void *data);
 343typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
 344
 345typedef struct VuDevInflightInfo {
 346    int fd;
 347    void *addr;
 348    uint64_t size;
 349} VuDevInflightInfo;
 350
 351struct VuDev {
 352    int sock;
 353    uint32_t nregions;
 354    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 355    VuVirtq *vq;
 356    VuDevInflightInfo inflight_info;
 357    int log_call_fd;
 358    int slave_fd;
 359    uint64_t log_size;
 360    uint8_t *log_table;
 361    uint64_t features;
 362    uint64_t protocol_features;
 363    bool broken;
 364    uint16_t max_queues;
 365
 366    /* @set_watch: add or update the given fd to the watch set,
 367     * call cb when condition is met */
 368    vu_set_watch_cb set_watch;
 369
 370    /* @remove_watch: remove the given fd from the watch set */
 371    vu_remove_watch_cb remove_watch;
 372
 373    /* @panic: encountered an unrecoverable error, you may try to
 374     * re-initialize */
 375    vu_panic_cb panic;
 376    const VuDevIface *iface;
 377
 378    /* Postcopy data */
 379    int postcopy_ufd;
 380    bool postcopy_listening;
 381};
 382
 383typedef struct VuVirtqElement {
 384    unsigned int index;
 385    unsigned int out_num;
 386    unsigned int in_num;
 387    struct iovec *in_sg;
 388    struct iovec *out_sg;
 389} VuVirtqElement;
 390
 391/**
 392 * vu_init:
 393 * @dev: a VuDev context
 394 * @max_queues: maximum number of virtqueues
 395 * @socket: the socket connected to vhost-user master
 396 * @panic: a panic callback
 397 * @set_watch: a set_watch callback
 398 * @remove_watch: a remove_watch callback
 399 * @iface: a VuDevIface structure with vhost-user device callbacks
 400 *
 401 * Intializes a VuDev vhost-user context.
 402 *
 403 * Returns: true on success, false on failure.
 404 **/
 405bool vu_init(VuDev *dev,
 406             uint16_t max_queues,
 407             int socket,
 408             vu_panic_cb panic,
 409             vu_set_watch_cb set_watch,
 410             vu_remove_watch_cb remove_watch,
 411             const VuDevIface *iface);
 412
 413
 414/**
 415 * vu_deinit:
 416 * @dev: a VuDev context
 417 *
 418 * Cleans up the VuDev context
 419 */
 420void vu_deinit(VuDev *dev);
 421
 422/**
 423 * vu_dispatch:
 424 * @dev: a VuDev context
 425 *
 426 * Process one vhost-user message.
 427 *
 428 * Returns: TRUE on success, FALSE on failure.
 429 */
 430bool vu_dispatch(VuDev *dev);
 431
 432/**
 433 * vu_gpa_to_va:
 434 * @dev: a VuDev context
 435 * @plen: guest memory size
 436 * @guest_addr: guest address
 437 *
 438 * Translate a guest address to a pointer. Returns NULL on failure.
 439 */
 440void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
 441
 442/**
 443 * vu_get_queue:
 444 * @dev: a VuDev context
 445 * @qidx: queue index
 446 *
 447 * Returns the queue number @qidx.
 448 */
 449VuVirtq *vu_get_queue(VuDev *dev, int qidx);
 450
 451/**
 452 * vu_set_queue_handler:
 453 * @dev: a VuDev context
 454 * @vq: a VuVirtq queue
 455 * @handler: the queue handler callback
 456 *
 457 * Set the queue handler. This function may be called several times
 458 * for the same queue. If called with NULL @handler, the handler is
 459 * removed.
 460 */
 461void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
 462                          vu_queue_handler_cb handler);
 463
 464/**
 465 * vu_set_queue_host_notifier:
 466 * @dev: a VuDev context
 467 * @vq: a VuVirtq queue
 468 * @fd: a file descriptor
 469 * @size: host page size
 470 * @offset: notifier offset in @fd file
 471 *
 472 * Set queue's host notifier. This function may be called several
 473 * times for the same queue. If called with -1 @fd, the notifier
 474 * is removed.
 475 */
 476bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
 477                                int size, int offset);
 478
 479/**
 480 * vu_queue_set_notification:
 481 * @dev: a VuDev context
 482 * @vq: a VuVirtq queue
 483 * @enable: state
 484 *
 485 * Set whether the queue notifies (via event index or interrupt)
 486 */
 487void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
 488
 489/**
 490 * vu_queue_enabled:
 491 * @dev: a VuDev context
 492 * @vq: a VuVirtq queue
 493 *
 494 * Returns: whether the queue is enabled.
 495 */
 496bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
 497
 498/**
 499 * vu_queue_started:
 500 * @dev: a VuDev context
 501 * @vq: a VuVirtq queue
 502 *
 503 * Returns: whether the queue is started.
 504 */
 505bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
 506
 507/**
 508 * vu_queue_empty:
 509 * @dev: a VuDev context
 510 * @vq: a VuVirtq queue
 511 *
 512 * Returns: true if the queue is empty or not ready.
 513 */
 514bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
 515
 516/**
 517 * vu_queue_notify:
 518 * @dev: a VuDev context
 519 * @vq: a VuVirtq queue
 520 *
 521 * Request to notify the queue via callfd (skipped if unnecessary)
 522 */
 523void vu_queue_notify(VuDev *dev, VuVirtq *vq);
 524
 525/**
 526 * vu_queue_pop:
 527 * @dev: a VuDev context
 528 * @vq: a VuVirtq queue
 529 * @sz: the size of struct to return (must be >= VuVirtqElement)
 530 *
 531 * Returns: a VuVirtqElement filled from the queue or NULL. The
 532 * returned element must be free()-d by the caller.
 533 */
 534void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
 535
 536
 537/**
 538 * vu_queue_unpop:
 539 * @dev: a VuDev context
 540 * @vq: a VuVirtq queue
 541 * @elem: The #VuVirtqElement
 542 * @len: number of bytes written
 543 *
 544 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 545 * call to vu_queue_pop() will refetch the element.
 546 */
 547void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
 548                    size_t len);
 549
 550/**
 551 * vu_queue_rewind:
 552 * @dev: a VuDev context
 553 * @vq: a VuVirtq queue
 554 * @num: number of elements to push back
 555 *
 556 * Pretend that elements weren't popped from the virtqueue.  The next
 557 * virtqueue_pop() will refetch the oldest element.
 558 *
 559 * Returns: true on success, false if @num is greater than the number of in use
 560 * elements.
 561 */
 562bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
 563
 564/**
 565 * vu_queue_fill:
 566 * @dev: a VuDev context
 567 * @vq: a VuVirtq queue
 568 * @elem: a VuVirtqElement
 569 * @len: length in bytes to write
 570 * @idx: optional offset for the used ring index (0 in general)
 571 *
 572 * Fill the used ring with @elem element.
 573 */
 574void vu_queue_fill(VuDev *dev, VuVirtq *vq,
 575                   const VuVirtqElement *elem,
 576                   unsigned int len, unsigned int idx);
 577
 578/**
 579 * vu_queue_push:
 580 * @dev: a VuDev context
 581 * @vq: a VuVirtq queue
 582 * @elem: a VuVirtqElement
 583 * @len: length in bytes to write
 584 *
 585 * Helper that combines vu_queue_fill() with a vu_queue_flush().
 586 */
 587void vu_queue_push(VuDev *dev, VuVirtq *vq,
 588                   const VuVirtqElement *elem, unsigned int len);
 589
 590/**
 591 * vu_queue_flush:
 592 * @dev: a VuDev context
 593 * @vq: a VuVirtq queue
 594 * @num: number of elements to flush
 595 *
 596 * Mark the last number of elements as done (used.idx is updated by
 597 * num elements).
 598*/
 599void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
 600
 601/**
 602 * vu_queue_get_avail_bytes:
 603 * @dev: a VuDev context
 604 * @vq: a VuVirtq queue
 605 * @in_bytes: in bytes
 606 * @out_bytes: out bytes
 607 * @max_in_bytes: stop counting after max_in_bytes
 608 * @max_out_bytes: stop counting after max_out_bytes
 609 *
 610 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
 611 */
 612void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
 613                              unsigned int *out_bytes,
 614                              unsigned max_in_bytes, unsigned max_out_bytes);
 615
 616/**
 617 * vu_queue_avail_bytes:
 618 * @dev: a VuDev context
 619 * @vq: a VuVirtq queue
 620 * @in_bytes: expected in bytes
 621 * @out_bytes: expected out bytes
 622 *
 623 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
 624 */
 625bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
 626                          unsigned int out_bytes);
 627
 628#endif /* LIBVHOST_USER_H */
 629