qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qapi/qmp/qdict.h"
  17#include "qapi/qapi-commands-virtio.h"
  18#include "qapi/qapi-commands-qom.h"
  19#include "qapi/qapi-visit-virtio.h"
  20#include "qapi/qmp/qjson.h"
  21#include "cpu.h"
  22#include "trace.h"
  23#include "qemu/error-report.h"
  24#include "qemu/log.h"
  25#include "qemu/main-loop.h"
  26#include "qemu/module.h"
  27#include "qom/object_interfaces.h"
  28#include "hw/virtio/virtio.h"
  29#include "migration/qemu-file-types.h"
  30#include "qemu/atomic.h"
  31#include "hw/virtio/virtio-bus.h"
  32#include "hw/qdev-properties.h"
  33#include "hw/virtio/virtio-access.h"
  34#include "sysemu/dma.h"
  35#include "sysemu/runstate.h"
  36#include "standard-headers/linux/virtio_ids.h"
  37#include "standard-headers/linux/vhost_types.h"
  38#include "standard-headers/linux/virtio_blk.h"
  39#include "standard-headers/linux/virtio_console.h"
  40#include "standard-headers/linux/virtio_gpu.h"
  41#include "standard-headers/linux/virtio_net.h"
  42#include "standard-headers/linux/virtio_scsi.h"
  43#include "standard-headers/linux/virtio_i2c.h"
  44#include "standard-headers/linux/virtio_balloon.h"
  45#include "standard-headers/linux/virtio_iommu.h"
  46#include "standard-headers/linux/virtio_mem.h"
  47#include "standard-headers/linux/virtio_vsock.h"
  48#include CONFIG_DEVICES
  49
  50/* QAPI list of realized VirtIODevices */
  51static QTAILQ_HEAD(, VirtIODevice) virtio_list;
  52
  53/*
  54 * Maximum size of virtio device config space
  55 */
  56#define VHOST_USER_MAX_CONFIG_SIZE 256
  57
  58#define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \
  59    { .virtio_bit = name, .feature_desc = desc }
  60
  61enum VhostUserProtocolFeature {
  62    VHOST_USER_PROTOCOL_F_MQ = 0,
  63    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  64    VHOST_USER_PROTOCOL_F_RARP = 2,
  65    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  66    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  67    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  68    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  69    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  70    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  71    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  72    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  73    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  74    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  75    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
  76    VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
  77    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
  78    VHOST_USER_PROTOCOL_F_MAX
  79};
  80
  81/* Virtio transport features mapping */
  82static qmp_virtio_feature_map_t virtio_transport_map[] = {
  83    /* Virtio device transport features */
  84#ifndef VIRTIO_CONFIG_NO_LEGACY
  85    FEATURE_ENTRY(VIRTIO_F_NOTIFY_ON_EMPTY, \
  86            "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. "
  87            "descs. on VQ"),
  88    FEATURE_ENTRY(VIRTIO_F_ANY_LAYOUT, \
  89            "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts"),
  90#endif /* !VIRTIO_CONFIG_NO_LEGACY */
  91    FEATURE_ENTRY(VIRTIO_F_VERSION_1, \
  92            "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"),
  93    FEATURE_ENTRY(VIRTIO_F_IOMMU_PLATFORM, \
  94            "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform"),
  95    FEATURE_ENTRY(VIRTIO_F_RING_PACKED, \
  96            "VIRTIO_F_RING_PACKED: Device supports packed VQ layout"),
  97    FEATURE_ENTRY(VIRTIO_F_IN_ORDER, \
  98            "VIRTIO_F_IN_ORDER: Device uses buffers in same order as made "
  99            "available by driver"),
 100    FEATURE_ENTRY(VIRTIO_F_ORDER_PLATFORM, \
 101            "VIRTIO_F_ORDER_PLATFORM: Memory accesses ordered by platform"),
 102    FEATURE_ENTRY(VIRTIO_F_SR_IOV, \
 103            "VIRTIO_F_SR_IOV: Device supports single root I/O virtualization"),
 104    /* Virtio ring transport features */
 105    FEATURE_ENTRY(VIRTIO_RING_F_INDIRECT_DESC, \
 106            "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported"),
 107    FEATURE_ENTRY(VIRTIO_RING_F_EVENT_IDX, \
 108            "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled"),
 109    { -1, "" }
 110};
 111
 112/* Vhost-user protocol features mapping */
 113static qmp_virtio_feature_map_t vhost_user_protocol_map[] = {
 114    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_MQ, \
 115            "VHOST_USER_PROTOCOL_F_MQ: Multiqueue protocol supported"),
 116    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_LOG_SHMFD, \
 117            "VHOST_USER_PROTOCOL_F_LOG_SHMFD: Shared log memory fd supported"),
 118    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RARP, \
 119            "VHOST_USER_PROTOCOL_F_RARP: Vhost-user back-end RARP broadcasting "
 120            "supported"),
 121    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_REPLY_ACK, \
 122            "VHOST_USER_PROTOCOL_F_REPLY_ACK: Requested operation status ack. "
 123            "supported"),
 124    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \
 125            "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"),
 126    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \
 127            "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated "
 128            "requests supported"),
 129    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \
 130            "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy "
 131            "devices supported"),
 132    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CRYPTO_SESSION, \
 133            "VHOST_USER_PROTOCOL_F_CRYPTO_SESSION: Session creation for crypto "
 134            "operations supported"),
 135    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_PAGEFAULT, \
 136            "VHOST_USER_PROTOCOL_F_PAGEFAULT: Request servicing on userfaultfd "
 137            "for accessed pages supported"),
 138    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \
 139            "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio "
 140            "device configuration space supported"),
 141    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \
 142            "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication "
 143            "channel supported"),
 144    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \
 145            "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified "
 146            "VQs supported"),
 147    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, \
 148            "VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: Shared inflight I/O buffers "
 149            "supported"),
 150    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RESET_DEVICE, \
 151            "VHOST_USER_PROTOCOL_F_RESET_DEVICE: Disabling all rings and "
 152            "resetting internal device state supported"),
 153    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS, \
 154            "VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS: In-band messaging "
 155            "supported"),
 156    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \
 157            "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for "
 158            "memory slots supported"),
 159    { -1, "" }
 160};
 161
 162/* virtio device configuration statuses */
 163static qmp_virtio_feature_map_t virtio_config_status_map[] = {
 164    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER_OK, \
 165            "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"),
 166    FEATURE_ENTRY(VIRTIO_CONFIG_S_FEATURES_OK, \
 167            "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete"),
 168    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER, \
 169            "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device"),
 170    FEATURE_ENTRY(VIRTIO_CONFIG_S_NEEDS_RESET, \
 171            "VIRTIO_CONFIG_S_NEEDS_RESET: Irrecoverable error, device needs "
 172            "reset"),
 173    FEATURE_ENTRY(VIRTIO_CONFIG_S_FAILED, \
 174            "VIRTIO_CONFIG_S_FAILED: Error in guest, device failed"),
 175    FEATURE_ENTRY(VIRTIO_CONFIG_S_ACKNOWLEDGE, \
 176            "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found"),
 177    { -1, "" }
 178};
 179
 180/* virtio-blk features mapping */
 181qmp_virtio_feature_map_t virtio_blk_feature_map[] = {
 182    FEATURE_ENTRY(VIRTIO_BLK_F_SIZE_MAX, \
 183            "VIRTIO_BLK_F_SIZE_MAX: Max segment size is size_max"),
 184    FEATURE_ENTRY(VIRTIO_BLK_F_SEG_MAX, \
 185            "VIRTIO_BLK_F_SEG_MAX: Max segments in a request is seg_max"),
 186    FEATURE_ENTRY(VIRTIO_BLK_F_GEOMETRY, \
 187            "VIRTIO_BLK_F_GEOMETRY: Legacy geometry available"),
 188    FEATURE_ENTRY(VIRTIO_BLK_F_RO, \
 189            "VIRTIO_BLK_F_RO: Device is read-only"),
 190    FEATURE_ENTRY(VIRTIO_BLK_F_BLK_SIZE, \
 191            "VIRTIO_BLK_F_BLK_SIZE: Block size of disk available"),
 192    FEATURE_ENTRY(VIRTIO_BLK_F_TOPOLOGY, \
 193            "VIRTIO_BLK_F_TOPOLOGY: Topology information available"),
 194    FEATURE_ENTRY(VIRTIO_BLK_F_MQ, \
 195            "VIRTIO_BLK_F_MQ: Multiqueue supported"),
 196    FEATURE_ENTRY(VIRTIO_BLK_F_DISCARD, \
 197            "VIRTIO_BLK_F_DISCARD: Discard command supported"),
 198    FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \
 199            "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"),
 200#ifndef VIRTIO_BLK_NO_LEGACY
 201    FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \
 202            "VIRTIO_BLK_F_BARRIER: Request barriers supported"),
 203    FEATURE_ENTRY(VIRTIO_BLK_F_SCSI, \
 204            "VIRTIO_BLK_F_SCSI: SCSI packet commands supported"),
 205    FEATURE_ENTRY(VIRTIO_BLK_F_FLUSH, \
 206            "VIRTIO_BLK_F_FLUSH: Flush command supported"),
 207    FEATURE_ENTRY(VIRTIO_BLK_F_CONFIG_WCE, \
 208            "VIRTIO_BLK_F_CONFIG_WCE: Cache writeback and writethrough modes "
 209            "supported"),
 210#endif /* !VIRTIO_BLK_NO_LEGACY */
 211    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 212            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 213    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 214            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 215            "negotiation supported"),
 216    { -1, "" }
 217};
 218
 219/* virtio-serial features mapping */
 220qmp_virtio_feature_map_t virtio_serial_feature_map[] = {
 221    FEATURE_ENTRY(VIRTIO_CONSOLE_F_SIZE, \
 222            "VIRTIO_CONSOLE_F_SIZE: Host providing console size"),
 223    FEATURE_ENTRY(VIRTIO_CONSOLE_F_MULTIPORT, \
 224            "VIRTIO_CONSOLE_F_MULTIPORT: Multiple ports for device supported"),
 225    FEATURE_ENTRY(VIRTIO_CONSOLE_F_EMERG_WRITE, \
 226            "VIRTIO_CONSOLE_F_EMERG_WRITE: Emergency write supported"),
 227    { -1, "" }
 228};
 229
 230/* virtio-gpu features mapping */
 231qmp_virtio_feature_map_t virtio_gpu_feature_map[] = {
 232    FEATURE_ENTRY(VIRTIO_GPU_F_VIRGL, \
 233            "VIRTIO_GPU_F_VIRGL: Virgl 3D mode supported"),
 234    FEATURE_ENTRY(VIRTIO_GPU_F_EDID, \
 235            "VIRTIO_GPU_F_EDID: EDID metadata supported"),
 236    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_UUID, \
 237            "VIRTIO_GPU_F_RESOURCE_UUID: Resource UUID assigning supported"),
 238    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_BLOB, \
 239            "VIRTIO_GPU_F_RESOURCE_BLOB: Size-based blob resources supported"),
 240    FEATURE_ENTRY(VIRTIO_GPU_F_CONTEXT_INIT, \
 241            "VIRTIO_GPU_F_CONTEXT_INIT: Context types and synchronization "
 242            "timelines supported"),
 243    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 244            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 245    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 246            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 247            "negotiation supported"),
 248    { -1, "" }
 249};
 250
 251/* virtio-input features mapping */
 252qmp_virtio_feature_map_t virtio_input_feature_map[] = {
 253    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 254            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 255    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 256            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 257            "negotiation supported"),
 258    { -1, "" }
 259};
 260
 261/* virtio-net features mapping */
 262qmp_virtio_feature_map_t virtio_net_feature_map[] = {
 263    FEATURE_ENTRY(VIRTIO_NET_F_CSUM, \
 264            "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum "
 265            "supported"),
 266    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_CSUM, \
 267            "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial "
 268            "checksum supported"),
 269    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
 270            "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading "
 271            "reconfig. supported"),
 272    FEATURE_ENTRY(VIRTIO_NET_F_MTU, \
 273            "VIRTIO_NET_F_MTU: Device max MTU reporting supported"),
 274    FEATURE_ENTRY(VIRTIO_NET_F_MAC, \
 275            "VIRTIO_NET_F_MAC: Device has given MAC address"),
 276    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO4, \
 277            "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4"),
 278    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO6, \
 279            "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6"),
 280    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ECN, \
 281            "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN"),
 282    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UFO, \
 283            "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO"),
 284    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO4, \
 285            "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4"),
 286    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO6, \
 287            "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6"),
 288    FEATURE_ENTRY(VIRTIO_NET_F_HOST_ECN, \
 289            "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN"),
 290    FEATURE_ENTRY(VIRTIO_NET_F_HOST_UFO, \
 291            "VIRTIO_NET_F_HOST_UFO: Device can receive UFO"),
 292    FEATURE_ENTRY(VIRTIO_NET_F_MRG_RXBUF, \
 293            "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"),
 294    FEATURE_ENTRY(VIRTIO_NET_F_STATUS, \
 295            "VIRTIO_NET_F_STATUS: Configuration status field available"),
 296    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VQ, \
 297            "VIRTIO_NET_F_CTRL_VQ: Control channel available"),
 298    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX, \
 299            "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported"),
 300    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VLAN, \
 301            "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported"),
 302    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX_EXTRA, \
 303            "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported"),
 304    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ANNOUNCE, \
 305            "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets "
 306            "supported"),
 307    FEATURE_ENTRY(VIRTIO_NET_F_MQ, \
 308            "VIRTIO_NET_F_MQ: Multiqueue with automatic receive steering "
 309            "supported"),
 310    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \
 311            "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control "
 312            "channel"),
 313    FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \
 314            "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"),
 315    FEATURE_ENTRY(VIRTIO_NET_F_RSS, \
 316            "VIRTIO_NET_F_RSS: RSS RX steering supported"),
 317    FEATURE_ENTRY(VIRTIO_NET_F_RSC_EXT, \
 318            "VIRTIO_NET_F_RSC_EXT: Extended coalescing info supported"),
 319    FEATURE_ENTRY(VIRTIO_NET_F_STANDBY, \
 320            "VIRTIO_NET_F_STANDBY: Device acting as standby for primary "
 321            "device with same MAC addr. supported"),
 322    FEATURE_ENTRY(VIRTIO_NET_F_SPEED_DUPLEX, \
 323            "VIRTIO_NET_F_SPEED_DUPLEX: Device set linkspeed and duplex"),
 324#ifndef VIRTIO_NET_NO_LEGACY
 325    FEATURE_ENTRY(VIRTIO_NET_F_GSO, \
 326            "VIRTIO_NET_F_GSO: Handling GSO-type packets supported"),
 327#endif /* !VIRTIO_NET_NO_LEGACY */
 328    FEATURE_ENTRY(VHOST_NET_F_VIRTIO_NET_HDR, \
 329            "VHOST_NET_F_VIRTIO_NET_HDR: Virtio-net headers for RX and TX "
 330            "packets supported"),
 331    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 332            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 333    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 334            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 335            "negotiation supported"),
 336    { -1, "" }
 337};
 338
 339/* virtio-scsi features mapping */
 340qmp_virtio_feature_map_t virtio_scsi_feature_map[] = {
 341    FEATURE_ENTRY(VIRTIO_SCSI_F_INOUT, \
 342            "VIRTIO_SCSI_F_INOUT: Requests including read and writable data "
 343            "buffers suppoted"),
 344    FEATURE_ENTRY(VIRTIO_SCSI_F_HOTPLUG, \
 345            "VIRTIO_SCSI_F_HOTPLUG: Reporting and handling hot-plug events "
 346            "supported"),
 347    FEATURE_ENTRY(VIRTIO_SCSI_F_CHANGE, \
 348            "VIRTIO_SCSI_F_CHANGE: Reporting and handling LUN changes "
 349            "supported"),
 350    FEATURE_ENTRY(VIRTIO_SCSI_F_T10_PI, \
 351            "VIRTIO_SCSI_F_T10_PI: T10 info included in request header"),
 352    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 353            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 354    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 355            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 356            "negotiation supported"),
 357    { -1, "" }
 358};
 359
 360/* virtio/vhost-user-fs features mapping */
 361qmp_virtio_feature_map_t virtio_fs_feature_map[] = {
 362    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 363            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 364    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 365            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 366            "negotiation supported"),
 367    { -1, "" }
 368};
 369
 370/* virtio/vhost-user-i2c features mapping */
 371qmp_virtio_feature_map_t virtio_i2c_feature_map[] = {
 372    FEATURE_ENTRY(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, \
 373            "VIRTIO_I2C_F_ZERO_LEGNTH_REQUEST: Zero length requests supported"),
 374    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 375            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 376    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 377            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 378            "negotiation supported"),
 379    { -1, "" }
 380};
 381
 382/* virtio/vhost-vsock features mapping */
 383qmp_virtio_feature_map_t virtio_vsock_feature_map[] = {
 384    FEATURE_ENTRY(VIRTIO_VSOCK_F_SEQPACKET, \
 385            "VIRTIO_VSOCK_F_SEQPACKET: SOCK_SEQPACKET supported"),
 386    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 387            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 388    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 389            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 390            "negotiation supported"),
 391    { -1, "" }
 392};
 393
 394/* virtio-balloon features mapping */
 395qmp_virtio_feature_map_t virtio_balloon_feature_map[] = {
 396    FEATURE_ENTRY(VIRTIO_BALLOON_F_MUST_TELL_HOST, \
 397            "VIRTIO_BALLOON_F_MUST_TELL_HOST: Tell host before reclaiming "
 398            "pages"),
 399    FEATURE_ENTRY(VIRTIO_BALLOON_F_STATS_VQ, \
 400            "VIRTIO_BALLOON_F_STATS_VQ: Guest memory stats VQ available"),
 401    FEATURE_ENTRY(VIRTIO_BALLOON_F_DEFLATE_ON_OOM, \
 402            "VIRTIO_BALLOON_F_DEFLATE_ON_OOM: Deflate balloon when guest OOM"),
 403    FEATURE_ENTRY(VIRTIO_BALLOON_F_FREE_PAGE_HINT, \
 404            "VIRTIO_BALLOON_F_FREE_PAGE_HINT: VQ reporting free pages enabled"),
 405    FEATURE_ENTRY(VIRTIO_BALLOON_F_PAGE_POISON, \
 406            "VIRTIO_BALLOON_F_PAGE_POISON: Guest page poisoning enabled"),
 407    FEATURE_ENTRY(VIRTIO_BALLOON_F_REPORTING, \
 408            "VIRTIO_BALLOON_F_REPORTING: Page reporting VQ enabled"),
 409    { -1, "" }
 410};
 411
 412/* virtio-crypto features mapping */
 413qmp_virtio_feature_map_t virtio_crypto_feature_map[] = {
 414    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 415            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 416    { -1, "" }
 417};
 418
 419/* virtio-iommu features mapping */
 420qmp_virtio_feature_map_t virtio_iommu_feature_map[] = {
 421    FEATURE_ENTRY(VIRTIO_IOMMU_F_INPUT_RANGE, \
 422            "VIRTIO_IOMMU_F_INPUT_RANGE: Range of available virtual addrs. "
 423            "available"),
 424    FEATURE_ENTRY(VIRTIO_IOMMU_F_DOMAIN_RANGE, \
 425            "VIRTIO_IOMMU_F_DOMAIN_RANGE: Number of supported domains "
 426            "available"),
 427    FEATURE_ENTRY(VIRTIO_IOMMU_F_MAP_UNMAP, \
 428            "VIRTIO_IOMMU_F_MAP_UNMAP: Map and unmap requests available"),
 429    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS, \
 430            "VIRTIO_IOMMU_F_BYPASS: Endpoints not attached to domains are in "
 431            "bypass mode"),
 432    FEATURE_ENTRY(VIRTIO_IOMMU_F_PROBE, \
 433            "VIRTIO_IOMMU_F_PROBE: Probe requests available"),
 434    FEATURE_ENTRY(VIRTIO_IOMMU_F_MMIO, \
 435            "VIRTIO_IOMMU_F_MMIO: VIRTIO_IOMMU_MAP_F_MMIO flag available"),
 436    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS_CONFIG, \
 437            "VIRTIO_IOMMU_F_BYPASS_CONFIG: Bypass field of IOMMU config "
 438            "available"),
 439    { -1, "" }
 440};
 441
 442/* virtio-mem features mapping */
 443qmp_virtio_feature_map_t virtio_mem_feature_map[] = {
 444#ifndef CONFIG_ACPI
 445    FEATURE_ENTRY(VIRTIO_MEM_F_ACPI_PXM, \
 446            "VIRTIO_MEM_F_ACPI_PXM: node_id is an ACPI PXM and is valid"),
 447#endif /* !CONFIG_ACPI */
 448    FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \
 449            "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be "
 450            "accessed"),
 451    { -1, "" }
 452};
 453
 454/* virtio-rng features mapping */
 455qmp_virtio_feature_map_t virtio_rng_feature_map[] = {
 456    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 457            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 458    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 459            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 460            "negotiation supported"),
 461    { -1, "" }
 462};
 463
 464/*
 465 * The alignment to use between consumer and producer parts of vring.
 466 * x86 pagesize again. This is the default, used by transports like PCI
 467 * which don't provide a means for the guest to tell the host the alignment.
 468 */
 469#define VIRTIO_PCI_VRING_ALIGN         4096
 470
 471typedef struct VRingDesc
 472{
 473    uint64_t addr;
 474    uint32_t len;
 475    uint16_t flags;
 476    uint16_t next;
 477} VRingDesc;
 478
 479typedef struct VRingPackedDesc {
 480    uint64_t addr;
 481    uint32_t len;
 482    uint16_t id;
 483    uint16_t flags;
 484} VRingPackedDesc;
 485
 486typedef struct VRingAvail
 487{
 488    uint16_t flags;
 489    uint16_t idx;
 490    uint16_t ring[];
 491} VRingAvail;
 492
 493typedef struct VRingUsedElem
 494{
 495    uint32_t id;
 496    uint32_t len;
 497} VRingUsedElem;
 498
 499typedef struct VRingUsed
 500{
 501    uint16_t flags;
 502    uint16_t idx;
 503    VRingUsedElem ring[];
 504} VRingUsed;
 505
 506typedef struct VRingMemoryRegionCaches {
 507    struct rcu_head rcu;
 508    MemoryRegionCache desc;
 509    MemoryRegionCache avail;
 510    MemoryRegionCache used;
 511} VRingMemoryRegionCaches;
 512
 513typedef struct VRing
 514{
 515    unsigned int num;
 516    unsigned int num_default;
 517    unsigned int align;
 518    hwaddr desc;
 519    hwaddr avail;
 520    hwaddr used;
 521    VRingMemoryRegionCaches *caches;
 522} VRing;
 523
 524typedef struct VRingPackedDescEvent {
 525    uint16_t off_wrap;
 526    uint16_t flags;
 527} VRingPackedDescEvent ;
 528
 529struct VirtQueue
 530{
 531    VRing vring;
 532    VirtQueueElement *used_elems;
 533
 534    /* Next head to pop */
 535    uint16_t last_avail_idx;
 536    bool last_avail_wrap_counter;
 537
 538    /* Last avail_idx read from VQ. */
 539    uint16_t shadow_avail_idx;
 540    bool shadow_avail_wrap_counter;
 541
 542    uint16_t used_idx;
 543    bool used_wrap_counter;
 544
 545    /* Last used index value we have signalled on */
 546    uint16_t signalled_used;
 547
 548    /* Last used index value we have signalled on */
 549    bool signalled_used_valid;
 550
 551    /* Notification enabled? */
 552    bool notification;
 553
 554    uint16_t queue_index;
 555
 556    unsigned int inuse;
 557
 558    uint16_t vector;
 559    VirtIOHandleOutput handle_output;
 560    VirtIODevice *vdev;
 561    EventNotifier guest_notifier;
 562    EventNotifier host_notifier;
 563    bool host_notifier_enabled;
 564    QLIST_ENTRY(VirtQueue) node;
 565};
 566
 567const char *virtio_device_names[] = {
 568    [VIRTIO_ID_NET] = "virtio-net",
 569    [VIRTIO_ID_BLOCK] = "virtio-blk",
 570    [VIRTIO_ID_CONSOLE] = "virtio-serial",
 571    [VIRTIO_ID_RNG] = "virtio-rng",
 572    [VIRTIO_ID_BALLOON] = "virtio-balloon",
 573    [VIRTIO_ID_IOMEM] = "virtio-iomem",
 574    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
 575    [VIRTIO_ID_SCSI] = "virtio-scsi",
 576    [VIRTIO_ID_9P] = "virtio-9p",
 577    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
 578    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
 579    [VIRTIO_ID_CAIF] = "virtio-caif",
 580    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
 581    [VIRTIO_ID_GPU] = "virtio-gpu",
 582    [VIRTIO_ID_CLOCK] = "virtio-clk",
 583    [VIRTIO_ID_INPUT] = "virtio-input",
 584    [VIRTIO_ID_VSOCK] = "vhost-vsock",
 585    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
 586    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
 587    [VIRTIO_ID_PSTORE] = "virtio-pstore",
 588    [VIRTIO_ID_IOMMU] = "virtio-iommu",
 589    [VIRTIO_ID_MEM] = "virtio-mem",
 590    [VIRTIO_ID_SOUND] = "virtio-sound",
 591    [VIRTIO_ID_FS] = "virtio-user-fs",
 592    [VIRTIO_ID_PMEM] = "virtio-pmem",
 593    [VIRTIO_ID_RPMB] = "virtio-rpmb",
 594    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
 595    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
 596    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
 597    [VIRTIO_ID_SCMI] = "virtio-scmi",
 598    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
 599    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
 600    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
 601    [VIRTIO_ID_CAN] = "virtio-can",
 602    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
 603    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
 604    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
 605    [VIRTIO_ID_BT] = "virtio-bluetooth",
 606    [VIRTIO_ID_GPIO] = "virtio-gpio"
 607};
 608
 609static const char *virtio_id_to_name(uint16_t device_id)
 610{
 611    assert(device_id < G_N_ELEMENTS(virtio_device_names));
 612    const char *name = virtio_device_names[device_id];
 613    assert(name != NULL);
 614    return name;
 615}
 616
 617/* Called within call_rcu().  */
 618static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 619{
 620    assert(caches != NULL);
 621    address_space_cache_destroy(&caches->desc);
 622    address_space_cache_destroy(&caches->avail);
 623    address_space_cache_destroy(&caches->used);
 624    g_free(caches);
 625}
 626
 627static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 628{
 629    VRingMemoryRegionCaches *caches;
 630
 631    caches = qatomic_read(&vq->vring.caches);
 632    qatomic_rcu_set(&vq->vring.caches, NULL);
 633    if (caches) {
 634        call_rcu(caches, virtio_free_region_cache, rcu);
 635    }
 636}
 637
 638static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 639{
 640    VirtQueue *vq = &vdev->vq[n];
 641    VRingMemoryRegionCaches *old = vq->vring.caches;
 642    VRingMemoryRegionCaches *new = NULL;
 643    hwaddr addr, size;
 644    int64_t len;
 645    bool packed;
 646
 647
 648    addr = vq->vring.desc;
 649    if (!addr) {
 650        goto out_no_cache;
 651    }
 652    new = g_new0(VRingMemoryRegionCaches, 1);
 653    size = virtio_queue_get_desc_size(vdev, n);
 654    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 655                                   true : false;
 656    len = address_space_cache_init(&new->desc, vdev->dma_as,
 657                                   addr, size, packed);
 658    if (len < size) {
 659        virtio_error(vdev, "Cannot map desc");
 660        goto err_desc;
 661    }
 662
 663    size = virtio_queue_get_used_size(vdev, n);
 664    len = address_space_cache_init(&new->used, vdev->dma_as,
 665                                   vq->vring.used, size, true);
 666    if (len < size) {
 667        virtio_error(vdev, "Cannot map used");
 668        goto err_used;
 669    }
 670
 671    size = virtio_queue_get_avail_size(vdev, n);
 672    len = address_space_cache_init(&new->avail, vdev->dma_as,
 673                                   vq->vring.avail, size, false);
 674    if (len < size) {
 675        virtio_error(vdev, "Cannot map avail");
 676        goto err_avail;
 677    }
 678
 679    qatomic_rcu_set(&vq->vring.caches, new);
 680    if (old) {
 681        call_rcu(old, virtio_free_region_cache, rcu);
 682    }
 683    return;
 684
 685err_avail:
 686    address_space_cache_destroy(&new->avail);
 687err_used:
 688    address_space_cache_destroy(&new->used);
 689err_desc:
 690    address_space_cache_destroy(&new->desc);
 691out_no_cache:
 692    g_free(new);
 693    virtio_virtqueue_reset_region_cache(vq);
 694}
 695
 696/* virt queue functions */
 697void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 698{
 699    VRing *vring = &vdev->vq[n].vring;
 700
 701    if (!vring->num || !vring->desc || !vring->align) {
 702        /* not yet setup -> nothing to do */
 703        return;
 704    }
 705    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 706    vring->used = vring_align(vring->avail +
 707                              offsetof(VRingAvail, ring[vring->num]),
 708                              vring->align);
 709    virtio_init_region_cache(vdev, n);
 710}
 711
 712/* Called within rcu_read_lock().  */
 713static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 714                                  MemoryRegionCache *cache, int i)
 715{
 716    address_space_read_cached(cache, i * sizeof(VRingDesc),
 717                              desc, sizeof(VRingDesc));
 718    virtio_tswap64s(vdev, &desc->addr);
 719    virtio_tswap32s(vdev, &desc->len);
 720    virtio_tswap16s(vdev, &desc->flags);
 721    virtio_tswap16s(vdev, &desc->next);
 722}
 723
 724static void vring_packed_event_read(VirtIODevice *vdev,
 725                                    MemoryRegionCache *cache,
 726                                    VRingPackedDescEvent *e)
 727{
 728    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 729    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 730
 731    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 732    /* Make sure flags is seen before off_wrap */
 733    smp_rmb();
 734    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 735    virtio_tswap16s(vdev, &e->flags);
 736}
 737
 738static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 739                                        MemoryRegionCache *cache,
 740                                        uint16_t off_wrap)
 741{
 742    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 743
 744    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 745    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 746}
 747
 748static void vring_packed_flags_write(VirtIODevice *vdev,
 749                                     MemoryRegionCache *cache, uint16_t flags)
 750{
 751    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 752
 753    virtio_stw_phys_cached(vdev, cache, off, flags);
 754    address_space_cache_invalidate(cache, off, sizeof(flags));
 755}
 756
 757/* Called within rcu_read_lock().  */
 758static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 759{
 760    return qatomic_rcu_read(&vq->vring.caches);
 761}
 762
 763/* Called within rcu_read_lock().  */
 764static inline uint16_t vring_avail_flags(VirtQueue *vq)
 765{
 766    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 767    hwaddr pa = offsetof(VRingAvail, flags);
 768
 769    if (!caches) {
 770        return 0;
 771    }
 772
 773    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 774}
 775
 776/* Called within rcu_read_lock().  */
 777static inline uint16_t vring_avail_idx(VirtQueue *vq)
 778{
 779    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 780    hwaddr pa = offsetof(VRingAvail, idx);
 781
 782    if (!caches) {
 783        return 0;
 784    }
 785
 786    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 787    return vq->shadow_avail_idx;
 788}
 789
 790/* Called within rcu_read_lock().  */
 791static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 792{
 793    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 794    hwaddr pa = offsetof(VRingAvail, ring[i]);
 795
 796    if (!caches) {
 797        return 0;
 798    }
 799
 800    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 801}
 802
 803/* Called within rcu_read_lock().  */
 804static inline uint16_t vring_get_used_event(VirtQueue *vq)
 805{
 806    return vring_avail_ring(vq, vq->vring.num);
 807}
 808
 809/* Called within rcu_read_lock().  */
 810static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 811                                    int i)
 812{
 813    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 814    hwaddr pa = offsetof(VRingUsed, ring[i]);
 815
 816    if (!caches) {
 817        return;
 818    }
 819
 820    virtio_tswap32s(vq->vdev, &uelem->id);
 821    virtio_tswap32s(vq->vdev, &uelem->len);
 822    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 823    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 824}
 825
 826/* Called within rcu_read_lock(). */
 827static inline uint16_t vring_used_flags(VirtQueue *vq)
 828{
 829    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 830    hwaddr pa = offsetof(VRingUsed, flags);
 831
 832    if (!caches) {
 833        return 0;
 834    }
 835
 836    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 837}
 838
 839/* Called within rcu_read_lock().  */
 840static uint16_t vring_used_idx(VirtQueue *vq)
 841{
 842    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 843    hwaddr pa = offsetof(VRingUsed, idx);
 844
 845    if (!caches) {
 846        return 0;
 847    }
 848
 849    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 850}
 851
 852/* Called within rcu_read_lock().  */
 853static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 854{
 855    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 856    hwaddr pa = offsetof(VRingUsed, idx);
 857
 858    if (caches) {
 859        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 860        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 861    }
 862
 863    vq->used_idx = val;
 864}
 865
 866/* Called within rcu_read_lock().  */
 867static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 868{
 869    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 870    VirtIODevice *vdev = vq->vdev;
 871    hwaddr pa = offsetof(VRingUsed, flags);
 872    uint16_t flags;
 873
 874    if (!caches) {
 875        return;
 876    }
 877
 878    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 879    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 880    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 881}
 882
 883/* Called within rcu_read_lock().  */
 884static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 885{
 886    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 887    VirtIODevice *vdev = vq->vdev;
 888    hwaddr pa = offsetof(VRingUsed, flags);
 889    uint16_t flags;
 890
 891    if (!caches) {
 892        return;
 893    }
 894
 895    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 896    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 897    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 898}
 899
 900/* Called within rcu_read_lock().  */
 901static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 902{
 903    VRingMemoryRegionCaches *caches;
 904    hwaddr pa;
 905    if (!vq->notification) {
 906        return;
 907    }
 908
 909    caches = vring_get_region_caches(vq);
 910    if (!caches) {
 911        return;
 912    }
 913
 914    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 915    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 916    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 917}
 918
 919static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 920{
 921    RCU_READ_LOCK_GUARD();
 922
 923    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 924        vring_set_avail_event(vq, vring_avail_idx(vq));
 925    } else if (enable) {
 926        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 927    } else {
 928        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 929    }
 930    if (enable) {
 931        /* Expose avail event/used flags before caller checks the avail idx. */
 932        smp_mb();
 933    }
 934}
 935
 936static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 937{
 938    uint16_t off_wrap;
 939    VRingPackedDescEvent e;
 940    VRingMemoryRegionCaches *caches;
 941
 942    RCU_READ_LOCK_GUARD();
 943    caches = vring_get_region_caches(vq);
 944    if (!caches) {
 945        return;
 946    }
 947
 948    vring_packed_event_read(vq->vdev, &caches->used, &e);
 949
 950    if (!enable) {
 951        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 952    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 953        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 954        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 955        /* Make sure off_wrap is wrote before flags */
 956        smp_wmb();
 957        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 958    } else {
 959        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 960    }
 961
 962    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 963    if (enable) {
 964        /* Expose avail event/used flags before caller checks the avail idx. */
 965        smp_mb();
 966    }
 967}
 968
 969bool virtio_queue_get_notification(VirtQueue *vq)
 970{
 971    return vq->notification;
 972}
 973
 974void virtio_queue_set_notification(VirtQueue *vq, int enable)
 975{
 976    vq->notification = enable;
 977
 978    if (!vq->vring.desc) {
 979        return;
 980    }
 981
 982    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 983        virtio_queue_packed_set_notification(vq, enable);
 984    } else {
 985        virtio_queue_split_set_notification(vq, enable);
 986    }
 987}
 988
 989int virtio_queue_ready(VirtQueue *vq)
 990{
 991    return vq->vring.avail != 0;
 992}
 993
 994static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 995                                         uint16_t *flags,
 996                                         MemoryRegionCache *cache,
 997                                         int i)
 998{
 999    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1000
1001    *flags = virtio_lduw_phys_cached(vdev, cache, off);
1002}
1003
1004static void vring_packed_desc_read(VirtIODevice *vdev,
1005                                   VRingPackedDesc *desc,
1006                                   MemoryRegionCache *cache,
1007                                   int i, bool strict_order)
1008{
1009    hwaddr off = i * sizeof(VRingPackedDesc);
1010
1011    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
1012
1013    if (strict_order) {
1014        /* Make sure flags is read before the rest fields. */
1015        smp_rmb();
1016    }
1017
1018    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
1019                              &desc->addr, sizeof(desc->addr));
1020    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
1021                              &desc->id, sizeof(desc->id));
1022    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
1023                              &desc->len, sizeof(desc->len));
1024    virtio_tswap64s(vdev, &desc->addr);
1025    virtio_tswap16s(vdev, &desc->id);
1026    virtio_tswap32s(vdev, &desc->len);
1027}
1028
1029static void vring_packed_desc_write_data(VirtIODevice *vdev,
1030                                         VRingPackedDesc *desc,
1031                                         MemoryRegionCache *cache,
1032                                         int i)
1033{
1034    hwaddr off_id = i * sizeof(VRingPackedDesc) +
1035                    offsetof(VRingPackedDesc, id);
1036    hwaddr off_len = i * sizeof(VRingPackedDesc) +
1037                    offsetof(VRingPackedDesc, len);
1038
1039    virtio_tswap32s(vdev, &desc->len);
1040    virtio_tswap16s(vdev, &desc->id);
1041    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
1042    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
1043    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
1044    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
1045}
1046
1047static void vring_packed_desc_write_flags(VirtIODevice *vdev,
1048                                          VRingPackedDesc *desc,
1049                                          MemoryRegionCache *cache,
1050                                          int i)
1051{
1052    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1053
1054    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
1055    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
1056}
1057
1058static void vring_packed_desc_write(VirtIODevice *vdev,
1059                                    VRingPackedDesc *desc,
1060                                    MemoryRegionCache *cache,
1061                                    int i, bool strict_order)
1062{
1063    vring_packed_desc_write_data(vdev, desc, cache, i);
1064    if (strict_order) {
1065        /* Make sure data is wrote before flags. */
1066        smp_wmb();
1067    }
1068    vring_packed_desc_write_flags(vdev, desc, cache, i);
1069}
1070
1071static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
1072{
1073    bool avail, used;
1074
1075    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1076    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1077    return (avail != used) && (avail == wrap_counter);
1078}
1079
1080/* Fetch avail_idx from VQ memory only when we really need to know if
1081 * guest has added some buffers.
1082 * Called within rcu_read_lock().  */
1083static int virtio_queue_empty_rcu(VirtQueue *vq)
1084{
1085    if (virtio_device_disabled(vq->vdev)) {
1086        return 1;
1087    }
1088
1089    if (unlikely(!vq->vring.avail)) {
1090        return 1;
1091    }
1092
1093    if (vq->shadow_avail_idx != vq->last_avail_idx) {
1094        return 0;
1095    }
1096
1097    return vring_avail_idx(vq) == vq->last_avail_idx;
1098}
1099
1100static int virtio_queue_split_empty(VirtQueue *vq)
1101{
1102    bool empty;
1103
1104    if (virtio_device_disabled(vq->vdev)) {
1105        return 1;
1106    }
1107
1108    if (unlikely(!vq->vring.avail)) {
1109        return 1;
1110    }
1111
1112    if (vq->shadow_avail_idx != vq->last_avail_idx) {
1113        return 0;
1114    }
1115
1116    RCU_READ_LOCK_GUARD();
1117    empty = vring_avail_idx(vq) == vq->last_avail_idx;
1118    return empty;
1119}
1120
1121/* Called within rcu_read_lock().  */
1122static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
1123{
1124    struct VRingPackedDesc desc;
1125    VRingMemoryRegionCaches *cache;
1126
1127    if (unlikely(!vq->vring.desc)) {
1128        return 1;
1129    }
1130
1131    cache = vring_get_region_caches(vq);
1132    if (!cache) {
1133        return 1;
1134    }
1135
1136    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
1137                                 vq->last_avail_idx);
1138
1139    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
1140}
1141
1142static int virtio_queue_packed_empty(VirtQueue *vq)
1143{
1144    RCU_READ_LOCK_GUARD();
1145    return virtio_queue_packed_empty_rcu(vq);
1146}
1147
1148int virtio_queue_empty(VirtQueue *vq)
1149{
1150    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1151        return virtio_queue_packed_empty(vq);
1152    } else {
1153        return virtio_queue_split_empty(vq);
1154    }
1155}
1156
1157static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
1158                               unsigned int len)
1159{
1160    AddressSpace *dma_as = vq->vdev->dma_as;
1161    unsigned int offset;
1162    int i;
1163
1164    offset = 0;
1165    for (i = 0; i < elem->in_num; i++) {
1166        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
1167
1168        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
1169                         elem->in_sg[i].iov_len,
1170                         DMA_DIRECTION_FROM_DEVICE, size);
1171
1172        offset += size;
1173    }
1174
1175    for (i = 0; i < elem->out_num; i++)
1176        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
1177                         elem->out_sg[i].iov_len,
1178                         DMA_DIRECTION_TO_DEVICE,
1179                         elem->out_sg[i].iov_len);
1180}
1181
1182/* virtqueue_detach_element:
1183 * @vq: The #VirtQueue
1184 * @elem: The #VirtQueueElement
1185 * @len: number of bytes written
1186 *
1187 * Detach the element from the virtqueue.  This function is suitable for device
1188 * reset or other situations where a #VirtQueueElement is simply freed and will
1189 * not be pushed or discarded.
1190 */
1191void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
1192                              unsigned int len)
1193{
1194    vq->inuse -= elem->ndescs;
1195    virtqueue_unmap_sg(vq, elem, len);
1196}
1197
1198static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
1199{
1200    vq->last_avail_idx -= num;
1201}
1202
1203static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
1204{
1205    if (vq->last_avail_idx < num) {
1206        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
1207        vq->last_avail_wrap_counter ^= 1;
1208    } else {
1209        vq->last_avail_idx -= num;
1210    }
1211}
1212
1213/* virtqueue_unpop:
1214 * @vq: The #VirtQueue
1215 * @elem: The #VirtQueueElement
1216 * @len: number of bytes written
1217 *
1218 * Pretend the most recent element wasn't popped from the virtqueue.  The next
1219 * call to virtqueue_pop() will refetch the element.
1220 */
1221void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
1222                     unsigned int len)
1223{
1224
1225    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1226        virtqueue_packed_rewind(vq, 1);
1227    } else {
1228        virtqueue_split_rewind(vq, 1);
1229    }
1230
1231    virtqueue_detach_element(vq, elem, len);
1232}
1233
1234/* virtqueue_rewind:
1235 * @vq: The #VirtQueue
1236 * @num: Number of elements to push back
1237 *
1238 * Pretend that elements weren't popped from the virtqueue.  The next
1239 * virtqueue_pop() will refetch the oldest element.
1240 *
1241 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
1242 *
1243 * Returns: true on success, false if @num is greater than the number of in use
1244 * elements.
1245 */
1246bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
1247{
1248    if (num > vq->inuse) {
1249        return false;
1250    }
1251
1252    vq->inuse -= num;
1253    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1254        virtqueue_packed_rewind(vq, num);
1255    } else {
1256        virtqueue_split_rewind(vq, num);
1257    }
1258    return true;
1259}
1260
1261static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
1262                    unsigned int len, unsigned int idx)
1263{
1264    VRingUsedElem uelem;
1265
1266    if (unlikely(!vq->vring.used)) {
1267        return;
1268    }
1269
1270    idx = (idx + vq->used_idx) % vq->vring.num;
1271
1272    uelem.id = elem->index;
1273    uelem.len = len;
1274    vring_used_write(vq, &uelem, idx);
1275}
1276
1277static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
1278                                  unsigned int len, unsigned int idx)
1279{
1280    vq->used_elems[idx].index = elem->index;
1281    vq->used_elems[idx].len = len;
1282    vq->used_elems[idx].ndescs = elem->ndescs;
1283}
1284
1285static void virtqueue_packed_fill_desc(VirtQueue *vq,
1286                                       const VirtQueueElement *elem,
1287                                       unsigned int idx,
1288                                       bool strict_order)
1289{
1290    uint16_t head;
1291    VRingMemoryRegionCaches *caches;
1292    VRingPackedDesc desc = {
1293        .id = elem->index,
1294        .len = elem->len,
1295    };
1296    bool wrap_counter = vq->used_wrap_counter;
1297
1298    if (unlikely(!vq->vring.desc)) {
1299        return;
1300    }
1301
1302    head = vq->used_idx + idx;
1303    if (head >= vq->vring.num) {
1304        head -= vq->vring.num;
1305        wrap_counter ^= 1;
1306    }
1307    if (wrap_counter) {
1308        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
1309        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
1310    } else {
1311        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
1312        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
1313    }
1314
1315    caches = vring_get_region_caches(vq);
1316    if (!caches) {
1317        return;
1318    }
1319
1320    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
1321}
1322
1323/* Called within rcu_read_lock().  */
1324void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
1325                    unsigned int len, unsigned int idx)
1326{
1327    trace_virtqueue_fill(vq, elem, len, idx);
1328
1329    virtqueue_unmap_sg(vq, elem, len);
1330
1331    if (virtio_device_disabled(vq->vdev)) {
1332        return;
1333    }
1334
1335    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1336        virtqueue_packed_fill(vq, elem, len, idx);
1337    } else {
1338        virtqueue_split_fill(vq, elem, len, idx);
1339    }
1340}
1341
1342/* Called within rcu_read_lock().  */
1343static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
1344{
1345    uint16_t old, new;
1346
1347    if (unlikely(!vq->vring.used)) {
1348        return;
1349    }
1350
1351    /* Make sure buffer is written before we update index. */
1352    smp_wmb();
1353    trace_virtqueue_flush(vq, count);
1354    old = vq->used_idx;
1355    new = old + count;
1356    vring_used_idx_set(vq, new);
1357    vq->inuse -= count;
1358    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
1359        vq->signalled_used_valid = false;
1360}
1361
1362static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
1363{
1364    unsigned int i, ndescs = 0;
1365
1366    if (unlikely(!vq->vring.desc)) {
1367        return;
1368    }
1369
1370    for (i = 1; i < count; i++) {
1371        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
1372        ndescs += vq->used_elems[i].ndescs;
1373    }
1374    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
1375    ndescs += vq->used_elems[0].ndescs;
1376
1377    vq->inuse -= ndescs;
1378    vq->used_idx += ndescs;
1379    if (vq->used_idx >= vq->vring.num) {
1380        vq->used_idx -= vq->vring.num;
1381        vq->used_wrap_counter ^= 1;
1382        vq->signalled_used_valid = false;
1383    }
1384}
1385
1386void virtqueue_flush(VirtQueue *vq, unsigned int count)
1387{
1388    if (virtio_device_disabled(vq->vdev)) {
1389        vq->inuse -= count;
1390        return;
1391    }
1392
1393    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1394        virtqueue_packed_flush(vq, count);
1395    } else {
1396        virtqueue_split_flush(vq, count);
1397    }
1398}
1399
1400void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
1401                    unsigned int len)
1402{
1403    RCU_READ_LOCK_GUARD();
1404    virtqueue_fill(vq, elem, len, 0);
1405    virtqueue_flush(vq, 1);
1406}
1407
1408/* Called within rcu_read_lock().  */
1409static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1410{
1411    uint16_t num_heads = vring_avail_idx(vq) - idx;
1412
1413    /* Check it isn't doing very strange things with descriptor numbers. */
1414    if (num_heads > vq->vring.num) {
1415        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1416                     idx, vq->shadow_avail_idx);
1417        return -EINVAL;
1418    }
1419    /* On success, callers read a descriptor at vq->last_avail_idx.
1420     * Make sure descriptor read does not bypass avail index read. */
1421    if (num_heads) {
1422        smp_rmb();
1423    }
1424
1425    return num_heads;
1426}
1427
1428/* Called within rcu_read_lock().  */
1429static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1430                               unsigned int *head)
1431{
1432    /* Grab the next descriptor number they're advertising, and increment
1433     * the index we've seen. */
1434    *head = vring_avail_ring(vq, idx % vq->vring.num);
1435
1436    /* If their number is silly, that's a fatal mistake. */
1437    if (*head >= vq->vring.num) {
1438        virtio_error(vq->vdev, "Guest says index %u is available", *head);
1439        return false;
1440    }
1441
1442    return true;
1443}
1444
1445enum {
1446    VIRTQUEUE_READ_DESC_ERROR = -1,
1447    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1448    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1449};
1450
1451static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1452                                          MemoryRegionCache *desc_cache,
1453                                          unsigned int max, unsigned int *next)
1454{
1455    /* If this descriptor says it doesn't chain, we're done. */
1456    if (!(desc->flags & VRING_DESC_F_NEXT)) {
1457        return VIRTQUEUE_READ_DESC_DONE;
1458    }
1459
1460    /* Check they're not leading us off end of descriptors. */
1461    *next = desc->next;
1462    /* Make sure compiler knows to grab that: we don't want it changing! */
1463    smp_wmb();
1464
1465    if (*next >= max) {
1466        virtio_error(vdev, "Desc next is %u", *next);
1467        return VIRTQUEUE_READ_DESC_ERROR;
1468    }
1469
1470    vring_split_desc_read(vdev, desc, desc_cache, *next);
1471    return VIRTQUEUE_READ_DESC_MORE;
1472}
1473
1474/* Called within rcu_read_lock().  */
1475static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1476                            unsigned int *in_bytes, unsigned int *out_bytes,
1477                            unsigned max_in_bytes, unsigned max_out_bytes,
1478                            VRingMemoryRegionCaches *caches)
1479{
1480    VirtIODevice *vdev = vq->vdev;
1481    unsigned int max, idx;
1482    unsigned int total_bufs, in_total, out_total;
1483    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1484    int64_t len = 0;
1485    int rc;
1486
1487    idx = vq->last_avail_idx;
1488    total_bufs = in_total = out_total = 0;
1489
1490    max = vq->vring.num;
1491
1492    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1493        MemoryRegionCache *desc_cache = &caches->desc;
1494        unsigned int num_bufs;
1495        VRingDesc desc;
1496        unsigned int i;
1497
1498        num_bufs = total_bufs;
1499
1500        if (!virtqueue_get_head(vq, idx++, &i)) {
1501            goto err;
1502        }
1503
1504        vring_split_desc_read(vdev, &desc, desc_cache, i);
1505
1506        if (desc.flags & VRING_DESC_F_INDIRECT) {
1507            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1508                virtio_error(vdev, "Invalid size for indirect buffer table");
1509                goto err;
1510            }
1511
1512            /* If we've got too many, that implies a descriptor loop. */
1513            if (num_bufs >= max) {
1514                virtio_error(vdev, "Looped descriptor");
1515                goto err;
1516            }
1517
1518            /* loop over the indirect descriptor table */
1519            len = address_space_cache_init(&indirect_desc_cache,
1520                                           vdev->dma_as,
1521                                           desc.addr, desc.len, false);
1522            desc_cache = &indirect_desc_cache;
1523            if (len < desc.len) {
1524                virtio_error(vdev, "Cannot map indirect buffer");
1525                goto err;
1526            }
1527
1528            max = desc.len / sizeof(VRingDesc);
1529            num_bufs = i = 0;
1530            vring_split_desc_read(vdev, &desc, desc_cache, i);
1531        }
1532
1533        do {
1534            /* If we've got too many, that implies a descriptor loop. */
1535            if (++num_bufs > max) {
1536                virtio_error(vdev, "Looped descriptor");
1537                goto err;
1538            }
1539
1540            if (desc.flags & VRING_DESC_F_WRITE) {
1541                in_total += desc.len;
1542            } else {
1543                out_total += desc.len;
1544            }
1545            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1546                goto done;
1547            }
1548
1549            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1550        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1551
1552        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1553            goto err;
1554        }
1555
1556        if (desc_cache == &indirect_desc_cache) {
1557            address_space_cache_destroy(&indirect_desc_cache);
1558            total_bufs++;
1559        } else {
1560            total_bufs = num_bufs;
1561        }
1562    }
1563
1564    if (rc < 0) {
1565        goto err;
1566    }
1567
1568done:
1569    address_space_cache_destroy(&indirect_desc_cache);
1570    if (in_bytes) {
1571        *in_bytes = in_total;
1572    }
1573    if (out_bytes) {
1574        *out_bytes = out_total;
1575    }
1576    return;
1577
1578err:
1579    in_total = out_total = 0;
1580    goto done;
1581}
1582
1583static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1584                                           VRingPackedDesc *desc,
1585                                           MemoryRegionCache
1586                                           *desc_cache,
1587                                           unsigned int max,
1588                                           unsigned int *next,
1589                                           bool indirect)
1590{
1591    /* If this descriptor says it doesn't chain, we're done. */
1592    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1593        return VIRTQUEUE_READ_DESC_DONE;
1594    }
1595
1596    ++*next;
1597    if (*next == max) {
1598        if (indirect) {
1599            return VIRTQUEUE_READ_DESC_DONE;
1600        } else {
1601            (*next) -= vq->vring.num;
1602        }
1603    }
1604
1605    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1606    return VIRTQUEUE_READ_DESC_MORE;
1607}
1608
1609/* Called within rcu_read_lock().  */
1610static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1611                                             unsigned int *in_bytes,
1612                                             unsigned int *out_bytes,
1613                                             unsigned max_in_bytes,
1614                                             unsigned max_out_bytes,
1615                                             VRingMemoryRegionCaches *caches)
1616{
1617    VirtIODevice *vdev = vq->vdev;
1618    unsigned int max, idx;
1619    unsigned int total_bufs, in_total, out_total;
1620    MemoryRegionCache *desc_cache;
1621    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1622    int64_t len = 0;
1623    VRingPackedDesc desc;
1624    bool wrap_counter;
1625
1626    idx = vq->last_avail_idx;
1627    wrap_counter = vq->last_avail_wrap_counter;
1628    total_bufs = in_total = out_total = 0;
1629
1630    max = vq->vring.num;
1631
1632    for (;;) {
1633        unsigned int num_bufs = total_bufs;
1634        unsigned int i = idx;
1635        int rc;
1636
1637        desc_cache = &caches->desc;
1638        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1639        if (!is_desc_avail(desc.flags, wrap_counter)) {
1640            break;
1641        }
1642
1643        if (desc.flags & VRING_DESC_F_INDIRECT) {
1644            if (desc.len % sizeof(VRingPackedDesc)) {
1645                virtio_error(vdev, "Invalid size for indirect buffer table");
1646                goto err;
1647            }
1648
1649            /* If we've got too many, that implies a descriptor loop. */
1650            if (num_bufs >= max) {
1651                virtio_error(vdev, "Looped descriptor");
1652                goto err;
1653            }
1654
1655            /* loop over the indirect descriptor table */
1656            len = address_space_cache_init(&indirect_desc_cache,
1657                                           vdev->dma_as,
1658                                           desc.addr, desc.len, false);
1659            desc_cache = &indirect_desc_cache;
1660            if (len < desc.len) {
1661                virtio_error(vdev, "Cannot map indirect buffer");
1662                goto err;
1663            }
1664
1665            max = desc.len / sizeof(VRingPackedDesc);
1666            num_bufs = i = 0;
1667            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1668        }
1669
1670        do {
1671            /* If we've got too many, that implies a descriptor loop. */
1672            if (++num_bufs > max) {
1673                virtio_error(vdev, "Looped descriptor");
1674                goto err;
1675            }
1676
1677            if (desc.flags & VRING_DESC_F_WRITE) {
1678                in_total += desc.len;
1679            } else {
1680                out_total += desc.len;
1681            }
1682            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1683                goto done;
1684            }
1685
1686            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1687                                                 &i, desc_cache ==
1688                                                 &indirect_desc_cache);
1689        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1690
1691        if (desc_cache == &indirect_desc_cache) {
1692            address_space_cache_destroy(&indirect_desc_cache);
1693            total_bufs++;
1694            idx++;
1695        } else {
1696            idx += num_bufs - total_bufs;
1697            total_bufs = num_bufs;
1698        }
1699
1700        if (idx >= vq->vring.num) {
1701            idx -= vq->vring.num;
1702            wrap_counter ^= 1;
1703        }
1704    }
1705
1706    /* Record the index and wrap counter for a kick we want */
1707    vq->shadow_avail_idx = idx;
1708    vq->shadow_avail_wrap_counter = wrap_counter;
1709done:
1710    address_space_cache_destroy(&indirect_desc_cache);
1711    if (in_bytes) {
1712        *in_bytes = in_total;
1713    }
1714    if (out_bytes) {
1715        *out_bytes = out_total;
1716    }
1717    return;
1718
1719err:
1720    in_total = out_total = 0;
1721    goto done;
1722}
1723
1724void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1725                               unsigned int *out_bytes,
1726                               unsigned max_in_bytes, unsigned max_out_bytes)
1727{
1728    uint16_t desc_size;
1729    VRingMemoryRegionCaches *caches;
1730
1731    RCU_READ_LOCK_GUARD();
1732
1733    if (unlikely(!vq->vring.desc)) {
1734        goto err;
1735    }
1736
1737    caches = vring_get_region_caches(vq);
1738    if (!caches) {
1739        goto err;
1740    }
1741
1742    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1743                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1744    if (caches->desc.len < vq->vring.num * desc_size) {
1745        virtio_error(vq->vdev, "Cannot map descriptor ring");
1746        goto err;
1747    }
1748
1749    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1750        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1751                                         max_in_bytes, max_out_bytes,
1752                                         caches);
1753    } else {
1754        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1755                                        max_in_bytes, max_out_bytes,
1756                                        caches);
1757    }
1758
1759    return;
1760err:
1761    if (in_bytes) {
1762        *in_bytes = 0;
1763    }
1764    if (out_bytes) {
1765        *out_bytes = 0;
1766    }
1767}
1768
1769int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1770                          unsigned int out_bytes)
1771{
1772    unsigned int in_total, out_total;
1773
1774    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1775    return in_bytes <= in_total && out_bytes <= out_total;
1776}
1777
1778static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1779                               hwaddr *addr, struct iovec *iov,
1780                               unsigned int max_num_sg, bool is_write,
1781                               hwaddr pa, size_t sz)
1782{
1783    bool ok = false;
1784    unsigned num_sg = *p_num_sg;
1785    assert(num_sg <= max_num_sg);
1786
1787    if (!sz) {
1788        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1789        goto out;
1790    }
1791
1792    while (sz) {
1793        hwaddr len = sz;
1794
1795        if (num_sg == max_num_sg) {
1796            virtio_error(vdev, "virtio: too many write descriptors in "
1797                               "indirect table");
1798            goto out;
1799        }
1800
1801        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1802                                              is_write ?
1803                                              DMA_DIRECTION_FROM_DEVICE :
1804                                              DMA_DIRECTION_TO_DEVICE,
1805                                              MEMTXATTRS_UNSPECIFIED);
1806        if (!iov[num_sg].iov_base) {
1807            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1808            goto out;
1809        }
1810
1811        iov[num_sg].iov_len = len;
1812        addr[num_sg] = pa;
1813
1814        sz -= len;
1815        pa += len;
1816        num_sg++;
1817    }
1818    ok = true;
1819
1820out:
1821    *p_num_sg = num_sg;
1822    return ok;
1823}
1824
1825/* Only used by error code paths before we have a VirtQueueElement (therefore
1826 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1827 * yet.
1828 */
1829static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1830                                    struct iovec *iov)
1831{
1832    unsigned int i;
1833
1834    for (i = 0; i < out_num + in_num; i++) {
1835        int is_write = i >= out_num;
1836
1837        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1838        iov++;
1839    }
1840}
1841
1842static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1843                                hwaddr *addr, unsigned int num_sg,
1844                                bool is_write)
1845{
1846    unsigned int i;
1847    hwaddr len;
1848
1849    for (i = 0; i < num_sg; i++) {
1850        len = sg[i].iov_len;
1851        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1852                                        addr[i], &len, is_write ?
1853                                        DMA_DIRECTION_FROM_DEVICE :
1854                                        DMA_DIRECTION_TO_DEVICE,
1855                                        MEMTXATTRS_UNSPECIFIED);
1856        if (!sg[i].iov_base) {
1857            error_report("virtio: error trying to map MMIO memory");
1858            exit(1);
1859        }
1860        if (len != sg[i].iov_len) {
1861            error_report("virtio: unexpected memory split");
1862            exit(1);
1863        }
1864    }
1865}
1866
1867void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1868{
1869    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1870    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1871                                                                        false);
1872}
1873
1874static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1875{
1876    VirtQueueElement *elem;
1877    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1878    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1879    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1880    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1881    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1882    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1883
1884    assert(sz >= sizeof(VirtQueueElement));
1885    elem = g_malloc(out_sg_end);
1886    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1887    elem->out_num = out_num;
1888    elem->in_num = in_num;
1889    elem->in_addr = (void *)elem + in_addr_ofs;
1890    elem->out_addr = (void *)elem + out_addr_ofs;
1891    elem->in_sg = (void *)elem + in_sg_ofs;
1892    elem->out_sg = (void *)elem + out_sg_ofs;
1893    return elem;
1894}
1895
1896static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1897{
1898    unsigned int i, head, max;
1899    VRingMemoryRegionCaches *caches;
1900    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1901    MemoryRegionCache *desc_cache;
1902    int64_t len;
1903    VirtIODevice *vdev = vq->vdev;
1904    VirtQueueElement *elem = NULL;
1905    unsigned out_num, in_num, elem_entries;
1906    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1907    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1908    VRingDesc desc;
1909    int rc;
1910
1911    RCU_READ_LOCK_GUARD();
1912    if (virtio_queue_empty_rcu(vq)) {
1913        goto done;
1914    }
1915    /* Needed after virtio_queue_empty(), see comment in
1916     * virtqueue_num_heads(). */
1917    smp_rmb();
1918
1919    /* When we start there are none of either input nor output. */
1920    out_num = in_num = elem_entries = 0;
1921
1922    max = vq->vring.num;
1923
1924    if (vq->inuse >= vq->vring.num) {
1925        virtio_error(vdev, "Virtqueue size exceeded");
1926        goto done;
1927    }
1928
1929    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1930        goto done;
1931    }
1932
1933    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1934        vring_set_avail_event(vq, vq->last_avail_idx);
1935    }
1936
1937    i = head;
1938
1939    caches = vring_get_region_caches(vq);
1940    if (!caches) {
1941        virtio_error(vdev, "Region caches not initialized");
1942        goto done;
1943    }
1944
1945    if (caches->desc.len < max * sizeof(VRingDesc)) {
1946        virtio_error(vdev, "Cannot map descriptor ring");
1947        goto done;
1948    }
1949
1950    desc_cache = &caches->desc;
1951    vring_split_desc_read(vdev, &desc, desc_cache, i);
1952    if (desc.flags & VRING_DESC_F_INDIRECT) {
1953        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1954            virtio_error(vdev, "Invalid size for indirect buffer table");
1955            goto done;
1956        }
1957
1958        /* loop over the indirect descriptor table */
1959        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1960                                       desc.addr, desc.len, false);
1961        desc_cache = &indirect_desc_cache;
1962        if (len < desc.len) {
1963            virtio_error(vdev, "Cannot map indirect buffer");
1964            goto done;
1965        }
1966
1967        max = desc.len / sizeof(VRingDesc);
1968        i = 0;
1969        vring_split_desc_read(vdev, &desc, desc_cache, i);
1970    }
1971
1972    /* Collect all the descriptors */
1973    do {
1974        bool map_ok;
1975
1976        if (desc.flags & VRING_DESC_F_WRITE) {
1977            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1978                                        iov + out_num,
1979                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1980                                        desc.addr, desc.len);
1981        } else {
1982            if (in_num) {
1983                virtio_error(vdev, "Incorrect order for descriptors");
1984                goto err_undo_map;
1985            }
1986            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1987                                        VIRTQUEUE_MAX_SIZE, false,
1988                                        desc.addr, desc.len);
1989        }
1990        if (!map_ok) {
1991            goto err_undo_map;
1992        }
1993
1994        /* If we've got too many, that implies a descriptor loop. */
1995        if (++elem_entries > max) {
1996            virtio_error(vdev, "Looped descriptor");
1997            goto err_undo_map;
1998        }
1999
2000        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
2001    } while (rc == VIRTQUEUE_READ_DESC_MORE);
2002
2003    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
2004        goto err_undo_map;
2005    }
2006
2007    /* Now copy what we have collected and mapped */
2008    elem = virtqueue_alloc_element(sz, out_num, in_num);
2009    elem->index = head;
2010    elem->ndescs = 1;
2011    for (i = 0; i < out_num; i++) {
2012        elem->out_addr[i] = addr[i];
2013        elem->out_sg[i] = iov[i];
2014    }
2015    for (i = 0; i < in_num; i++) {
2016        elem->in_addr[i] = addr[out_num + i];
2017        elem->in_sg[i] = iov[out_num + i];
2018    }
2019
2020    vq->inuse++;
2021
2022    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2023done:
2024    address_space_cache_destroy(&indirect_desc_cache);
2025
2026    return elem;
2027
2028err_undo_map:
2029    virtqueue_undo_map_desc(out_num, in_num, iov);
2030    goto done;
2031}
2032
2033static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
2034{
2035    unsigned int i, max;
2036    VRingMemoryRegionCaches *caches;
2037    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
2038    MemoryRegionCache *desc_cache;
2039    int64_t len;
2040    VirtIODevice *vdev = vq->vdev;
2041    VirtQueueElement *elem = NULL;
2042    unsigned out_num, in_num, elem_entries;
2043    hwaddr addr[VIRTQUEUE_MAX_SIZE];
2044    struct iovec iov[VIRTQUEUE_MAX_SIZE];
2045    VRingPackedDesc desc;
2046    uint16_t id;
2047    int rc;
2048
2049    RCU_READ_LOCK_GUARD();
2050    if (virtio_queue_packed_empty_rcu(vq)) {
2051        goto done;
2052    }
2053
2054    /* When we start there are none of either input nor output. */
2055    out_num = in_num = elem_entries = 0;
2056
2057    max = vq->vring.num;
2058
2059    if (vq->inuse >= vq->vring.num) {
2060        virtio_error(vdev, "Virtqueue size exceeded");
2061        goto done;
2062    }
2063
2064    i = vq->last_avail_idx;
2065
2066    caches = vring_get_region_caches(vq);
2067    if (!caches) {
2068        virtio_error(vdev, "Region caches not initialized");
2069        goto done;
2070    }
2071
2072    if (caches->desc.len < max * sizeof(VRingDesc)) {
2073        virtio_error(vdev, "Cannot map descriptor ring");
2074        goto done;
2075    }
2076
2077    desc_cache = &caches->desc;
2078    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
2079    id = desc.id;
2080    if (desc.flags & VRING_DESC_F_INDIRECT) {
2081        if (desc.len % sizeof(VRingPackedDesc)) {
2082            virtio_error(vdev, "Invalid size for indirect buffer table");
2083            goto done;
2084        }
2085
2086        /* loop over the indirect descriptor table */
2087        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
2088                                       desc.addr, desc.len, false);
2089        desc_cache = &indirect_desc_cache;
2090        if (len < desc.len) {
2091            virtio_error(vdev, "Cannot map indirect buffer");
2092            goto done;
2093        }
2094
2095        max = desc.len / sizeof(VRingPackedDesc);
2096        i = 0;
2097        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
2098    }
2099
2100    /* Collect all the descriptors */
2101    do {
2102        bool map_ok;
2103
2104        if (desc.flags & VRING_DESC_F_WRITE) {
2105            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
2106                                        iov + out_num,
2107                                        VIRTQUEUE_MAX_SIZE - out_num, true,
2108                                        desc.addr, desc.len);
2109        } else {
2110            if (in_num) {
2111                virtio_error(vdev, "Incorrect order for descriptors");
2112                goto err_undo_map;
2113            }
2114            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
2115                                        VIRTQUEUE_MAX_SIZE, false,
2116                                        desc.addr, desc.len);
2117        }
2118        if (!map_ok) {
2119            goto err_undo_map;
2120        }
2121
2122        /* If we've got too many, that implies a descriptor loop. */
2123        if (++elem_entries > max) {
2124            virtio_error(vdev, "Looped descriptor");
2125            goto err_undo_map;
2126        }
2127
2128        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
2129                                             desc_cache ==
2130                                             &indirect_desc_cache);
2131    } while (rc == VIRTQUEUE_READ_DESC_MORE);
2132
2133    /* Now copy what we have collected and mapped */
2134    elem = virtqueue_alloc_element(sz, out_num, in_num);
2135    for (i = 0; i < out_num; i++) {
2136        elem->out_addr[i] = addr[i];
2137        elem->out_sg[i] = iov[i];
2138    }
2139    for (i = 0; i < in_num; i++) {
2140        elem->in_addr[i] = addr[out_num + i];
2141        elem->in_sg[i] = iov[out_num + i];
2142    }
2143
2144    elem->index = id;
2145    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
2146    vq->last_avail_idx += elem->ndescs;
2147    vq->inuse += elem->ndescs;
2148
2149    if (vq->last_avail_idx >= vq->vring.num) {
2150        vq->last_avail_idx -= vq->vring.num;
2151        vq->last_avail_wrap_counter ^= 1;
2152    }
2153
2154    vq->shadow_avail_idx = vq->last_avail_idx;
2155    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
2156
2157    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2158done:
2159    address_space_cache_destroy(&indirect_desc_cache);
2160
2161    return elem;
2162
2163err_undo_map:
2164    virtqueue_undo_map_desc(out_num, in_num, iov);
2165    goto done;
2166}
2167
2168void *virtqueue_pop(VirtQueue *vq, size_t sz)
2169{
2170    if (virtio_device_disabled(vq->vdev)) {
2171        return NULL;
2172    }
2173
2174    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
2175        return virtqueue_packed_pop(vq, sz);
2176    } else {
2177        return virtqueue_split_pop(vq, sz);
2178    }
2179}
2180
2181static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
2182{
2183    VRingMemoryRegionCaches *caches;
2184    MemoryRegionCache *desc_cache;
2185    unsigned int dropped = 0;
2186    VirtQueueElement elem = {};
2187    VirtIODevice *vdev = vq->vdev;
2188    VRingPackedDesc desc;
2189
2190    RCU_READ_LOCK_GUARD();
2191
2192    caches = vring_get_region_caches(vq);
2193    if (!caches) {
2194        return 0;
2195    }
2196
2197    desc_cache = &caches->desc;
2198
2199    virtio_queue_set_notification(vq, 0);
2200
2201    while (vq->inuse < vq->vring.num) {
2202        unsigned int idx = vq->last_avail_idx;
2203        /*
2204         * works similar to virtqueue_pop but does not map buffers
2205         * and does not allocate any memory.
2206         */
2207        vring_packed_desc_read(vdev, &desc, desc_cache,
2208                               vq->last_avail_idx , true);
2209        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
2210            break;
2211        }
2212        elem.index = desc.id;
2213        elem.ndescs = 1;
2214        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
2215                                               vq->vring.num, &idx, false)) {
2216            ++elem.ndescs;
2217        }
2218        /*
2219         * immediately push the element, nothing to unmap
2220         * as both in_num and out_num are set to 0.
2221         */
2222        virtqueue_push(vq, &elem, 0);
2223        dropped++;
2224        vq->last_avail_idx += elem.ndescs;
2225        if (vq->last_avail_idx >= vq->vring.num) {
2226            vq->last_avail_idx -= vq->vring.num;
2227            vq->last_avail_wrap_counter ^= 1;
2228        }
2229    }
2230
2231    return dropped;
2232}
2233
2234static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
2235{
2236    unsigned int dropped = 0;
2237    VirtQueueElement elem = {};
2238    VirtIODevice *vdev = vq->vdev;
2239    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2240
2241    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
2242        /* works similar to virtqueue_pop but does not map buffers
2243        * and does not allocate any memory */
2244        smp_rmb();
2245        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
2246            break;
2247        }
2248        vq->inuse++;
2249        vq->last_avail_idx++;
2250        if (fEventIdx) {
2251            vring_set_avail_event(vq, vq->last_avail_idx);
2252        }
2253        /* immediately push the element, nothing to unmap
2254         * as both in_num and out_num are set to 0 */
2255        virtqueue_push(vq, &elem, 0);
2256        dropped++;
2257    }
2258
2259    return dropped;
2260}
2261
2262/* virtqueue_drop_all:
2263 * @vq: The #VirtQueue
2264 * Drops all queued buffers and indicates them to the guest
2265 * as if they are done. Useful when buffers can not be
2266 * processed but must be returned to the guest.
2267 */
2268unsigned int virtqueue_drop_all(VirtQueue *vq)
2269{
2270    struct VirtIODevice *vdev = vq->vdev;
2271
2272    if (virtio_device_disabled(vq->vdev)) {
2273        return 0;
2274    }
2275
2276    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2277        return virtqueue_packed_drop_all(vq);
2278    } else {
2279        return virtqueue_split_drop_all(vq);
2280    }
2281}
2282
2283/* Reading and writing a structure directly to QEMUFile is *awful*, but
2284 * it is what QEMU has always done by mistake.  We can change it sooner
2285 * or later by bumping the version number of the affected vm states.
2286 * In the meanwhile, since the in-memory layout of VirtQueueElement
2287 * has changed, we need to marshal to and from the layout that was
2288 * used before the change.
2289 */
2290typedef struct VirtQueueElementOld {
2291    unsigned int index;
2292    unsigned int out_num;
2293    unsigned int in_num;
2294    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
2295    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
2296    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
2297    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
2298} VirtQueueElementOld;
2299
2300void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
2301{
2302    VirtQueueElement *elem;
2303    VirtQueueElementOld data;
2304    int i;
2305
2306    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2307
2308    /* TODO: teach all callers that this can fail, and return failure instead
2309     * of asserting here.
2310     * This is just one thing (there are probably more) that must be
2311     * fixed before we can allow NDEBUG compilation.
2312     */
2313    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
2314    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
2315
2316    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
2317    elem->index = data.index;
2318
2319    for (i = 0; i < elem->in_num; i++) {
2320        elem->in_addr[i] = data.in_addr[i];
2321    }
2322
2323    for (i = 0; i < elem->out_num; i++) {
2324        elem->out_addr[i] = data.out_addr[i];
2325    }
2326
2327    for (i = 0; i < elem->in_num; i++) {
2328        /* Base is overwritten by virtqueue_map.  */
2329        elem->in_sg[i].iov_base = 0;
2330        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
2331    }
2332
2333    for (i = 0; i < elem->out_num; i++) {
2334        /* Base is overwritten by virtqueue_map.  */
2335        elem->out_sg[i].iov_base = 0;
2336        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
2337    }
2338
2339    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2340        qemu_get_be32s(f, &elem->ndescs);
2341    }
2342
2343    virtqueue_map(vdev, elem);
2344    return elem;
2345}
2346
2347void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
2348                                VirtQueueElement *elem)
2349{
2350    VirtQueueElementOld data;
2351    int i;
2352
2353    memset(&data, 0, sizeof(data));
2354    data.index = elem->index;
2355    data.in_num = elem->in_num;
2356    data.out_num = elem->out_num;
2357
2358    for (i = 0; i < elem->in_num; i++) {
2359        data.in_addr[i] = elem->in_addr[i];
2360    }
2361
2362    for (i = 0; i < elem->out_num; i++) {
2363        data.out_addr[i] = elem->out_addr[i];
2364    }
2365
2366    for (i = 0; i < elem->in_num; i++) {
2367        /* Base is overwritten by virtqueue_map when loading.  Do not
2368         * save it, as it would leak the QEMU address space layout.  */
2369        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
2370    }
2371
2372    for (i = 0; i < elem->out_num; i++) {
2373        /* Do not save iov_base as above.  */
2374        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
2375    }
2376
2377    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2378        qemu_put_be32s(f, &elem->ndescs);
2379    }
2380
2381    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2382}
2383
2384/* virtio device */
2385static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
2386{
2387    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2388    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2389
2390    if (virtio_device_disabled(vdev)) {
2391        return;
2392    }
2393
2394    if (k->notify) {
2395        k->notify(qbus->parent, vector);
2396    }
2397}
2398
2399void virtio_update_irq(VirtIODevice *vdev)
2400{
2401    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2402}
2403
2404static int virtio_validate_features(VirtIODevice *vdev)
2405{
2406    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2407
2408    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2409        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2410        return -EFAULT;
2411    }
2412
2413    if (k->validate_features) {
2414        return k->validate_features(vdev);
2415    } else {
2416        return 0;
2417    }
2418}
2419
2420int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2421{
2422    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2423    trace_virtio_set_status(vdev, val);
2424
2425    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2426        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2427            val & VIRTIO_CONFIG_S_FEATURES_OK) {
2428            int ret = virtio_validate_features(vdev);
2429
2430            if (ret) {
2431                return ret;
2432            }
2433        }
2434    }
2435
2436    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2437        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2438        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2439    }
2440
2441    if (k->set_status) {
2442        k->set_status(vdev, val);
2443    }
2444    vdev->status = val;
2445
2446    return 0;
2447}
2448
2449static enum virtio_device_endian virtio_default_endian(void)
2450{
2451    if (target_words_bigendian()) {
2452        return VIRTIO_DEVICE_ENDIAN_BIG;
2453    } else {
2454        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2455    }
2456}
2457
2458static enum virtio_device_endian virtio_current_cpu_endian(void)
2459{
2460    if (cpu_virtio_is_big_endian(current_cpu)) {
2461        return VIRTIO_DEVICE_ENDIAN_BIG;
2462    } else {
2463        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2464    }
2465}
2466
2467static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2468{
2469    vdev->vq[i].vring.desc = 0;
2470    vdev->vq[i].vring.avail = 0;
2471    vdev->vq[i].vring.used = 0;
2472    vdev->vq[i].last_avail_idx = 0;
2473    vdev->vq[i].shadow_avail_idx = 0;
2474    vdev->vq[i].used_idx = 0;
2475    vdev->vq[i].last_avail_wrap_counter = true;
2476    vdev->vq[i].shadow_avail_wrap_counter = true;
2477    vdev->vq[i].used_wrap_counter = true;
2478    virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2479    vdev->vq[i].signalled_used = 0;
2480    vdev->vq[i].signalled_used_valid = false;
2481    vdev->vq[i].notification = true;
2482    vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2483    vdev->vq[i].inuse = 0;
2484    virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2485}
2486
2487void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2488{
2489    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2490
2491    if (k->queue_reset) {
2492        k->queue_reset(vdev, queue_index);
2493    }
2494
2495    __virtio_queue_reset(vdev, queue_index);
2496}
2497
2498void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2499{
2500    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2501
2502    /*
2503     * TODO: Seabios is currently out of spec and triggering this error.
2504     * So this needs to be fixed in Seabios, then this can
2505     * be re-enabled for new machine types only, and also after
2506     * being converted to LOG_GUEST_ERROR.
2507     *
2508    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2509        error_report("queue_enable is only suppported in devices of virtio "
2510                     "1.0 or later.");
2511    }
2512    */
2513
2514    if (k->queue_enable) {
2515        k->queue_enable(vdev, queue_index);
2516    }
2517}
2518
2519void virtio_reset(void *opaque)
2520{
2521    VirtIODevice *vdev = opaque;
2522    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2523    int i;
2524
2525    virtio_set_status(vdev, 0);
2526    if (current_cpu) {
2527        /* Guest initiated reset */
2528        vdev->device_endian = virtio_current_cpu_endian();
2529    } else {
2530        /* System reset */
2531        vdev->device_endian = virtio_default_endian();
2532    }
2533
2534    if (k->reset) {
2535        k->reset(vdev);
2536    }
2537
2538    vdev->start_on_kick = false;
2539    vdev->started = false;
2540    vdev->broken = false;
2541    vdev->guest_features = 0;
2542    vdev->queue_sel = 0;
2543    vdev->status = 0;
2544    vdev->disabled = false;
2545    qatomic_set(&vdev->isr, 0);
2546    vdev->config_vector = VIRTIO_NO_VECTOR;
2547    virtio_notify_vector(vdev, vdev->config_vector);
2548
2549    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2550        __virtio_queue_reset(vdev, i);
2551    }
2552}
2553
2554uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2555{
2556    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2557    uint8_t val;
2558
2559    if (addr + sizeof(val) > vdev->config_len) {
2560        return (uint32_t)-1;
2561    }
2562
2563    k->get_config(vdev, vdev->config);
2564
2565    val = ldub_p(vdev->config + addr);
2566    return val;
2567}
2568
2569uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2570{
2571    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2572    uint16_t val;
2573
2574    if (addr + sizeof(val) > vdev->config_len) {
2575        return (uint32_t)-1;
2576    }
2577
2578    k->get_config(vdev, vdev->config);
2579
2580    val = lduw_p(vdev->config + addr);
2581    return val;
2582}
2583
2584uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2585{
2586    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2587    uint32_t val;
2588
2589    if (addr + sizeof(val) > vdev->config_len) {
2590        return (uint32_t)-1;
2591    }
2592
2593    k->get_config(vdev, vdev->config);
2594
2595    val = ldl_p(vdev->config + addr);
2596    return val;
2597}
2598
2599void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2600{
2601    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2602    uint8_t val = data;
2603
2604    if (addr + sizeof(val) > vdev->config_len) {
2605        return;
2606    }
2607
2608    stb_p(vdev->config + addr, val);
2609
2610    if (k->set_config) {
2611        k->set_config(vdev, vdev->config);
2612    }
2613}
2614
2615void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2616{
2617    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2618    uint16_t val = data;
2619
2620    if (addr + sizeof(val) > vdev->config_len) {
2621        return;
2622    }
2623
2624    stw_p(vdev->config + addr, val);
2625
2626    if (k->set_config) {
2627        k->set_config(vdev, vdev->config);
2628    }
2629}
2630
2631void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2632{
2633    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2634    uint32_t val = data;
2635
2636    if (addr + sizeof(val) > vdev->config_len) {
2637        return;
2638    }
2639
2640    stl_p(vdev->config + addr, val);
2641
2642    if (k->set_config) {
2643        k->set_config(vdev, vdev->config);
2644    }
2645}
2646
2647uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2648{
2649    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2650    uint8_t val;
2651
2652    if (addr + sizeof(val) > vdev->config_len) {
2653        return (uint32_t)-1;
2654    }
2655
2656    k->get_config(vdev, vdev->config);
2657
2658    val = ldub_p(vdev->config + addr);
2659    return val;
2660}
2661
2662uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2663{
2664    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2665    uint16_t val;
2666
2667    if (addr + sizeof(val) > vdev->config_len) {
2668        return (uint32_t)-1;
2669    }
2670
2671    k->get_config(vdev, vdev->config);
2672
2673    val = lduw_le_p(vdev->config + addr);
2674    return val;
2675}
2676
2677uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2678{
2679    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2680    uint32_t val;
2681
2682    if (addr + sizeof(val) > vdev->config_len) {
2683        return (uint32_t)-1;
2684    }
2685
2686    k->get_config(vdev, vdev->config);
2687
2688    val = ldl_le_p(vdev->config + addr);
2689    return val;
2690}
2691
2692void virtio_config_modern_writeb(VirtIODevice *vdev,
2693                                 uint32_t addr, uint32_t data)
2694{
2695    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2696    uint8_t val = data;
2697
2698    if (addr + sizeof(val) > vdev->config_len) {
2699        return;
2700    }
2701
2702    stb_p(vdev->config + addr, val);
2703
2704    if (k->set_config) {
2705        k->set_config(vdev, vdev->config);
2706    }
2707}
2708
2709void virtio_config_modern_writew(VirtIODevice *vdev,
2710                                 uint32_t addr, uint32_t data)
2711{
2712    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2713    uint16_t val = data;
2714
2715    if (addr + sizeof(val) > vdev->config_len) {
2716        return;
2717    }
2718
2719    stw_le_p(vdev->config + addr, val);
2720
2721    if (k->set_config) {
2722        k->set_config(vdev, vdev->config);
2723    }
2724}
2725
2726void virtio_config_modern_writel(VirtIODevice *vdev,
2727                                 uint32_t addr, uint32_t data)
2728{
2729    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2730    uint32_t val = data;
2731
2732    if (addr + sizeof(val) > vdev->config_len) {
2733        return;
2734    }
2735
2736    stl_le_p(vdev->config + addr, val);
2737
2738    if (k->set_config) {
2739        k->set_config(vdev, vdev->config);
2740    }
2741}
2742
2743void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2744{
2745    if (!vdev->vq[n].vring.num) {
2746        return;
2747    }
2748    vdev->vq[n].vring.desc = addr;
2749    virtio_queue_update_rings(vdev, n);
2750}
2751
2752hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2753{
2754    return vdev->vq[n].vring.desc;
2755}
2756
2757void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2758                            hwaddr avail, hwaddr used)
2759{
2760    if (!vdev->vq[n].vring.num) {
2761        return;
2762    }
2763    vdev->vq[n].vring.desc = desc;
2764    vdev->vq[n].vring.avail = avail;
2765    vdev->vq[n].vring.used = used;
2766    virtio_init_region_cache(vdev, n);
2767}
2768
2769void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2770{
2771    /* Don't allow guest to flip queue between existent and
2772     * nonexistent states, or to set it to an invalid size.
2773     */
2774    if (!!num != !!vdev->vq[n].vring.num ||
2775        num > VIRTQUEUE_MAX_SIZE ||
2776        num < 0) {
2777        return;
2778    }
2779    vdev->vq[n].vring.num = num;
2780}
2781
2782VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2783{
2784    return QLIST_FIRST(&vdev->vector_queues[vector]);
2785}
2786
2787VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2788{
2789    return QLIST_NEXT(vq, node);
2790}
2791
2792int virtio_queue_get_num(VirtIODevice *vdev, int n)
2793{
2794    return vdev->vq[n].vring.num;
2795}
2796
2797int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2798{
2799    return vdev->vq[n].vring.num_default;
2800}
2801
2802int virtio_get_num_queues(VirtIODevice *vdev)
2803{
2804    int i;
2805
2806    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2807        if (!virtio_queue_get_num(vdev, i)) {
2808            break;
2809        }
2810    }
2811
2812    return i;
2813}
2814
2815void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2816{
2817    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2818    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2819
2820    /* virtio-1 compliant devices cannot change the alignment */
2821    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2822        error_report("tried to modify queue alignment for virtio-1 device");
2823        return;
2824    }
2825    /* Check that the transport told us it was going to do this
2826     * (so a buggy transport will immediately assert rather than
2827     * silently failing to migrate this state)
2828     */
2829    assert(k->has_variable_vring_alignment);
2830
2831    if (align) {
2832        vdev->vq[n].vring.align = align;
2833        virtio_queue_update_rings(vdev, n);
2834    }
2835}
2836
2837static void virtio_queue_notify_vq(VirtQueue *vq)
2838{
2839    if (vq->vring.desc && vq->handle_output) {
2840        VirtIODevice *vdev = vq->vdev;
2841
2842        if (unlikely(vdev->broken)) {
2843            return;
2844        }
2845
2846        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2847        vq->handle_output(vdev, vq);
2848
2849        if (unlikely(vdev->start_on_kick)) {
2850            virtio_set_started(vdev, true);
2851        }
2852    }
2853}
2854
2855void virtio_queue_notify(VirtIODevice *vdev, int n)
2856{
2857    VirtQueue *vq = &vdev->vq[n];
2858
2859    if (unlikely(!vq->vring.desc || vdev->broken)) {
2860        return;
2861    }
2862
2863    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2864    if (vq->host_notifier_enabled) {
2865        event_notifier_set(&vq->host_notifier);
2866    } else if (vq->handle_output) {
2867        vq->handle_output(vdev, vq);
2868
2869        if (unlikely(vdev->start_on_kick)) {
2870            virtio_set_started(vdev, true);
2871        }
2872    }
2873}
2874
2875uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2876{
2877    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2878        VIRTIO_NO_VECTOR;
2879}
2880
2881void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2882{
2883    VirtQueue *vq = &vdev->vq[n];
2884
2885    if (n < VIRTIO_QUEUE_MAX) {
2886        if (vdev->vector_queues &&
2887            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2888            QLIST_REMOVE(vq, node);
2889        }
2890        vdev->vq[n].vector = vector;
2891        if (vdev->vector_queues &&
2892            vector != VIRTIO_NO_VECTOR) {
2893            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2894        }
2895    }
2896}
2897
2898VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2899                            VirtIOHandleOutput handle_output)
2900{
2901    int i;
2902
2903    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2904        if (vdev->vq[i].vring.num == 0)
2905            break;
2906    }
2907
2908    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2909        abort();
2910
2911    vdev->vq[i].vring.num = queue_size;
2912    vdev->vq[i].vring.num_default = queue_size;
2913    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2914    vdev->vq[i].handle_output = handle_output;
2915    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2916
2917    return &vdev->vq[i];
2918}
2919
2920void virtio_delete_queue(VirtQueue *vq)
2921{
2922    vq->vring.num = 0;
2923    vq->vring.num_default = 0;
2924    vq->handle_output = NULL;
2925    g_free(vq->used_elems);
2926    vq->used_elems = NULL;
2927    virtio_virtqueue_reset_region_cache(vq);
2928}
2929
2930void virtio_del_queue(VirtIODevice *vdev, int n)
2931{
2932    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2933        abort();
2934    }
2935
2936    virtio_delete_queue(&vdev->vq[n]);
2937}
2938
2939static void virtio_set_isr(VirtIODevice *vdev, int value)
2940{
2941    uint8_t old = qatomic_read(&vdev->isr);
2942
2943    /* Do not write ISR if it does not change, so that its cacheline remains
2944     * shared in the common case where the guest does not read it.
2945     */
2946    if ((old & value) != value) {
2947        qatomic_or(&vdev->isr, value);
2948    }
2949}
2950
2951/* Called within rcu_read_lock(). */
2952static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2953{
2954    uint16_t old, new;
2955    bool v;
2956    /* We need to expose used array entries before checking used event. */
2957    smp_mb();
2958    /* Always notify when queue is empty (when feature acknowledge) */
2959    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2960        !vq->inuse && virtio_queue_empty(vq)) {
2961        return true;
2962    }
2963
2964    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2965        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2966    }
2967
2968    v = vq->signalled_used_valid;
2969    vq->signalled_used_valid = true;
2970    old = vq->signalled_used;
2971    new = vq->signalled_used = vq->used_idx;
2972    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2973}
2974
2975static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2976                                    uint16_t off_wrap, uint16_t new,
2977                                    uint16_t old)
2978{
2979    int off = off_wrap & ~(1 << 15);
2980
2981    if (wrap != off_wrap >> 15) {
2982        off -= vq->vring.num;
2983    }
2984
2985    return vring_need_event(off, new, old);
2986}
2987
2988/* Called within rcu_read_lock(). */
2989static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2990{
2991    VRingPackedDescEvent e;
2992    uint16_t old, new;
2993    bool v;
2994    VRingMemoryRegionCaches *caches;
2995
2996    caches = vring_get_region_caches(vq);
2997    if (!caches) {
2998        return false;
2999    }
3000
3001    vring_packed_event_read(vdev, &caches->avail, &e);
3002
3003    old = vq->signalled_used;
3004    new = vq->signalled_used = vq->used_idx;
3005    v = vq->signalled_used_valid;
3006    vq->signalled_used_valid = true;
3007
3008    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
3009        return false;
3010    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
3011        return true;
3012    }
3013
3014    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
3015                                         e.off_wrap, new, old);
3016}
3017
3018/* Called within rcu_read_lock().  */
3019static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
3020{
3021    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3022        return virtio_packed_should_notify(vdev, vq);
3023    } else {
3024        return virtio_split_should_notify(vdev, vq);
3025    }
3026}
3027
3028void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
3029{
3030    WITH_RCU_READ_LOCK_GUARD() {
3031        if (!virtio_should_notify(vdev, vq)) {
3032            return;
3033        }
3034    }
3035
3036    trace_virtio_notify_irqfd(vdev, vq);
3037
3038    /*
3039     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
3040     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
3041     * incorrectly polling this bit during crashdump and hibernation
3042     * in MSI mode, causing a hang if this bit is never updated.
3043     * Recent releases of Windows do not really shut down, but rather
3044     * log out and hibernate to make the next startup faster.  Hence,
3045     * this manifested as a more serious hang during shutdown with
3046     *
3047     * Next driver release from 2016 fixed this problem, so working around it
3048     * is not a must, but it's easy to do so let's do it here.
3049     *
3050     * Note: it's safe to update ISR from any thread as it was switched
3051     * to an atomic operation.
3052     */
3053    virtio_set_isr(vq->vdev, 0x1);
3054    event_notifier_set(&vq->guest_notifier);
3055}
3056
3057static void virtio_irq(VirtQueue *vq)
3058{
3059    virtio_set_isr(vq->vdev, 0x1);
3060    virtio_notify_vector(vq->vdev, vq->vector);
3061}
3062
3063void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
3064{
3065    WITH_RCU_READ_LOCK_GUARD() {
3066        if (!virtio_should_notify(vdev, vq)) {
3067            return;
3068        }
3069    }
3070
3071    trace_virtio_notify(vdev, vq);
3072    virtio_irq(vq);
3073}
3074
3075void virtio_notify_config(VirtIODevice *vdev)
3076{
3077    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
3078        return;
3079
3080    virtio_set_isr(vdev, 0x3);
3081    vdev->generation++;
3082    virtio_notify_vector(vdev, vdev->config_vector);
3083}
3084
3085static bool virtio_device_endian_needed(void *opaque)
3086{
3087    VirtIODevice *vdev = opaque;
3088
3089    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
3090    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3091        return vdev->device_endian != virtio_default_endian();
3092    }
3093    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
3094    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
3095}
3096
3097static bool virtio_64bit_features_needed(void *opaque)
3098{
3099    VirtIODevice *vdev = opaque;
3100
3101    return (vdev->host_features >> 32) != 0;
3102}
3103
3104static bool virtio_virtqueue_needed(void *opaque)
3105{
3106    VirtIODevice *vdev = opaque;
3107
3108    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
3109}
3110
3111static bool virtio_packed_virtqueue_needed(void *opaque)
3112{
3113    VirtIODevice *vdev = opaque;
3114
3115    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
3116}
3117
3118static bool virtio_ringsize_needed(void *opaque)
3119{
3120    VirtIODevice *vdev = opaque;
3121    int i;
3122
3123    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3124        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
3125            return true;
3126        }
3127    }
3128    return false;
3129}
3130
3131static bool virtio_extra_state_needed(void *opaque)
3132{
3133    VirtIODevice *vdev = opaque;
3134    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3135    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3136
3137    return k->has_extra_state &&
3138        k->has_extra_state(qbus->parent);
3139}
3140
3141static bool virtio_broken_needed(void *opaque)
3142{
3143    VirtIODevice *vdev = opaque;
3144
3145    return vdev->broken;
3146}
3147
3148static bool virtio_started_needed(void *opaque)
3149{
3150    VirtIODevice *vdev = opaque;
3151
3152    return vdev->started;
3153}
3154
3155static bool virtio_disabled_needed(void *opaque)
3156{
3157    VirtIODevice *vdev = opaque;
3158
3159    return vdev->disabled;
3160}
3161
3162static const VMStateDescription vmstate_virtqueue = {
3163    .name = "virtqueue_state",
3164    .version_id = 1,
3165    .minimum_version_id = 1,
3166    .fields = (VMStateField[]) {
3167        VMSTATE_UINT64(vring.avail, struct VirtQueue),
3168        VMSTATE_UINT64(vring.used, struct VirtQueue),
3169        VMSTATE_END_OF_LIST()
3170    }
3171};
3172
3173static const VMStateDescription vmstate_packed_virtqueue = {
3174    .name = "packed_virtqueue_state",
3175    .version_id = 1,
3176    .minimum_version_id = 1,
3177    .fields = (VMStateField[]) {
3178        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
3179        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
3180        VMSTATE_UINT16(used_idx, struct VirtQueue),
3181        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
3182        VMSTATE_UINT32(inuse, struct VirtQueue),
3183        VMSTATE_END_OF_LIST()
3184    }
3185};
3186
3187static const VMStateDescription vmstate_virtio_virtqueues = {
3188    .name = "virtio/virtqueues",
3189    .version_id = 1,
3190    .minimum_version_id = 1,
3191    .needed = &virtio_virtqueue_needed,
3192    .fields = (VMStateField[]) {
3193        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3194                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
3195        VMSTATE_END_OF_LIST()
3196    }
3197};
3198
3199static const VMStateDescription vmstate_virtio_packed_virtqueues = {
3200    .name = "virtio/packed_virtqueues",
3201    .version_id = 1,
3202    .minimum_version_id = 1,
3203    .needed = &virtio_packed_virtqueue_needed,
3204    .fields = (VMStateField[]) {
3205        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3206                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
3207        VMSTATE_END_OF_LIST()
3208    }
3209};
3210
3211static const VMStateDescription vmstate_ringsize = {
3212    .name = "ringsize_state",
3213    .version_id = 1,
3214    .minimum_version_id = 1,
3215    .fields = (VMStateField[]) {
3216        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
3217        VMSTATE_END_OF_LIST()
3218    }
3219};
3220
3221static const VMStateDescription vmstate_virtio_ringsize = {
3222    .name = "virtio/ringsize",
3223    .version_id = 1,
3224    .minimum_version_id = 1,
3225    .needed = &virtio_ringsize_needed,
3226    .fields = (VMStateField[]) {
3227        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3228                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
3229        VMSTATE_END_OF_LIST()
3230    }
3231};
3232
3233static int get_extra_state(QEMUFile *f, void *pv, size_t size,
3234                           const VMStateField *field)
3235{
3236    VirtIODevice *vdev = pv;
3237    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3238    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3239
3240    if (!k->load_extra_state) {
3241        return -1;
3242    } else {
3243        return k->load_extra_state(qbus->parent, f);
3244    }
3245}
3246
3247static int put_extra_state(QEMUFile *f, void *pv, size_t size,
3248                           const VMStateField *field, JSONWriter *vmdesc)
3249{
3250    VirtIODevice *vdev = pv;
3251    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3252    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3253
3254    k->save_extra_state(qbus->parent, f);
3255    return 0;
3256}
3257
3258static const VMStateInfo vmstate_info_extra_state = {
3259    .name = "virtqueue_extra_state",
3260    .get = get_extra_state,
3261    .put = put_extra_state,
3262};
3263
3264static const VMStateDescription vmstate_virtio_extra_state = {
3265    .name = "virtio/extra_state",
3266    .version_id = 1,
3267    .minimum_version_id = 1,
3268    .needed = &virtio_extra_state_needed,
3269    .fields = (VMStateField[]) {
3270        {
3271            .name         = "extra_state",
3272            .version_id   = 0,
3273            .field_exists = NULL,
3274            .size         = 0,
3275            .info         = &vmstate_info_extra_state,
3276            .flags        = VMS_SINGLE,
3277            .offset       = 0,
3278        },
3279        VMSTATE_END_OF_LIST()
3280    }
3281};
3282
3283static const VMStateDescription vmstate_virtio_device_endian = {
3284    .name = "virtio/device_endian",
3285    .version_id = 1,
3286    .minimum_version_id = 1,
3287    .needed = &virtio_device_endian_needed,
3288    .fields = (VMStateField[]) {
3289        VMSTATE_UINT8(device_endian, VirtIODevice),
3290        VMSTATE_END_OF_LIST()
3291    }
3292};
3293
3294static const VMStateDescription vmstate_virtio_64bit_features = {
3295    .name = "virtio/64bit_features",
3296    .version_id = 1,
3297    .minimum_version_id = 1,
3298    .needed = &virtio_64bit_features_needed,
3299    .fields = (VMStateField[]) {
3300        VMSTATE_UINT64(guest_features, VirtIODevice),
3301        VMSTATE_END_OF_LIST()
3302    }
3303};
3304
3305static const VMStateDescription vmstate_virtio_broken = {
3306    .name = "virtio/broken",
3307    .version_id = 1,
3308    .minimum_version_id = 1,
3309    .needed = &virtio_broken_needed,
3310    .fields = (VMStateField[]) {
3311        VMSTATE_BOOL(broken, VirtIODevice),
3312        VMSTATE_END_OF_LIST()
3313    }
3314};
3315
3316static const VMStateDescription vmstate_virtio_started = {
3317    .name = "virtio/started",
3318    .version_id = 1,
3319    .minimum_version_id = 1,
3320    .needed = &virtio_started_needed,
3321    .fields = (VMStateField[]) {
3322        VMSTATE_BOOL(started, VirtIODevice),
3323        VMSTATE_END_OF_LIST()
3324    }
3325};
3326
3327static const VMStateDescription vmstate_virtio_disabled = {
3328    .name = "virtio/disabled",
3329    .version_id = 1,
3330    .minimum_version_id = 1,
3331    .needed = &virtio_disabled_needed,
3332    .fields = (VMStateField[]) {
3333        VMSTATE_BOOL(disabled, VirtIODevice),
3334        VMSTATE_END_OF_LIST()
3335    }
3336};
3337
3338static const VMStateDescription vmstate_virtio = {
3339    .name = "virtio",
3340    .version_id = 1,
3341    .minimum_version_id = 1,
3342    .fields = (VMStateField[]) {
3343        VMSTATE_END_OF_LIST()
3344    },
3345    .subsections = (const VMStateDescription*[]) {
3346        &vmstate_virtio_device_endian,
3347        &vmstate_virtio_64bit_features,
3348        &vmstate_virtio_virtqueues,
3349        &vmstate_virtio_ringsize,
3350        &vmstate_virtio_broken,
3351        &vmstate_virtio_extra_state,
3352        &vmstate_virtio_started,
3353        &vmstate_virtio_packed_virtqueues,
3354        &vmstate_virtio_disabled,
3355        NULL
3356    }
3357};
3358
3359int virtio_save(VirtIODevice *vdev, QEMUFile *f)
3360{
3361    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3362    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3363    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3364    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
3365    int i;
3366
3367    if (k->save_config) {
3368        k->save_config(qbus->parent, f);
3369    }
3370
3371    qemu_put_8s(f, &vdev->status);
3372    qemu_put_8s(f, &vdev->isr);
3373    qemu_put_be16s(f, &vdev->queue_sel);
3374    qemu_put_be32s(f, &guest_features_lo);
3375    qemu_put_be32(f, vdev->config_len);
3376    qemu_put_buffer(f, vdev->config, vdev->config_len);
3377
3378    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3379        if (vdev->vq[i].vring.num == 0)
3380            break;
3381    }
3382
3383    qemu_put_be32(f, i);
3384
3385    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3386        if (vdev->vq[i].vring.num == 0)
3387            break;
3388
3389        qemu_put_be32(f, vdev->vq[i].vring.num);
3390        if (k->has_variable_vring_alignment) {
3391            qemu_put_be32(f, vdev->vq[i].vring.align);
3392        }
3393        /*
3394         * Save desc now, the rest of the ring addresses are saved in
3395         * subsections for VIRTIO-1 devices.
3396         */
3397        qemu_put_be64(f, vdev->vq[i].vring.desc);
3398        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
3399        if (k->save_queue) {
3400            k->save_queue(qbus->parent, i, f);
3401        }
3402    }
3403
3404    if (vdc->save != NULL) {
3405        vdc->save(vdev, f);
3406    }
3407
3408    if (vdc->vmsd) {
3409        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
3410        if (ret) {
3411            return ret;
3412        }
3413    }
3414
3415    /* Subsections */
3416    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
3417}
3418
3419/* A wrapper for use as a VMState .put function */
3420static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
3421                              const VMStateField *field, JSONWriter *vmdesc)
3422{
3423    return virtio_save(VIRTIO_DEVICE(opaque), f);
3424}
3425
3426/* A wrapper for use as a VMState .get function */
3427static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
3428                             const VMStateField *field)
3429{
3430    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
3431    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
3432
3433    return virtio_load(vdev, f, dc->vmsd->version_id);
3434}
3435
3436const VMStateInfo  virtio_vmstate_info = {
3437    .name = "virtio",
3438    .get = virtio_device_get,
3439    .put = virtio_device_put,
3440};
3441
3442static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
3443{
3444    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3445    bool bad = (val & ~(vdev->host_features)) != 0;
3446
3447    val &= vdev->host_features;
3448    if (k->set_features) {
3449        k->set_features(vdev, val);
3450    }
3451    vdev->guest_features = val;
3452    return bad ? -1 : 0;
3453}
3454
3455int virtio_set_features(VirtIODevice *vdev, uint64_t val)
3456{
3457    int ret;
3458    /*
3459     * The driver must not attempt to set features after feature negotiation
3460     * has finished.
3461     */
3462    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
3463        return -EINVAL;
3464    }
3465
3466    if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
3467        qemu_log_mask(LOG_GUEST_ERROR,
3468                      "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
3469                      __func__, vdev->name);
3470    }
3471
3472    ret = virtio_set_features_nocheck(vdev, val);
3473    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
3474        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
3475        int i;
3476        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3477            if (vdev->vq[i].vring.num != 0) {
3478                virtio_init_region_cache(vdev, i);
3479            }
3480        }
3481    }
3482    if (!ret) {
3483        if (!virtio_device_started(vdev, vdev->status) &&
3484            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3485            vdev->start_on_kick = true;
3486        }
3487    }
3488    return ret;
3489}
3490
3491size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
3492                              uint64_t host_features)
3493{
3494    size_t config_size = params->min_size;
3495    const VirtIOFeature *feature_sizes = params->feature_sizes;
3496    size_t i;
3497
3498    for (i = 0; feature_sizes[i].flags != 0; i++) {
3499        if (host_features & feature_sizes[i].flags) {
3500            config_size = MAX(feature_sizes[i].end, config_size);
3501        }
3502    }
3503
3504    assert(config_size <= params->max_size);
3505    return config_size;
3506}
3507
3508int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3509{
3510    int i, ret;
3511    int32_t config_len;
3512    uint32_t num;
3513    uint32_t features;
3514    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3515    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3516    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3517
3518    /*
3519     * We poison the endianness to ensure it does not get used before
3520     * subsections have been loaded.
3521     */
3522    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3523
3524    if (k->load_config) {
3525        ret = k->load_config(qbus->parent, f);
3526        if (ret)
3527            return ret;
3528    }
3529
3530    qemu_get_8s(f, &vdev->status);
3531    qemu_get_8s(f, &vdev->isr);
3532    qemu_get_be16s(f, &vdev->queue_sel);
3533    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3534        return -1;
3535    }
3536    qemu_get_be32s(f, &features);
3537
3538    /*
3539     * Temporarily set guest_features low bits - needed by
3540     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3541     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3542     *
3543     * Note: devices should always test host features in future - don't create
3544     * new dependencies like this.
3545     */
3546    vdev->guest_features = features;
3547
3548    config_len = qemu_get_be32(f);
3549
3550    /*
3551     * There are cases where the incoming config can be bigger or smaller
3552     * than what we have; so load what we have space for, and skip
3553     * any excess that's in the stream.
3554     */
3555    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3556
3557    while (config_len > vdev->config_len) {
3558        qemu_get_byte(f);
3559        config_len--;
3560    }
3561
3562    num = qemu_get_be32(f);
3563
3564    if (num > VIRTIO_QUEUE_MAX) {
3565        error_report("Invalid number of virtqueues: 0x%x", num);
3566        return -1;
3567    }
3568
3569    for (i = 0; i < num; i++) {
3570        vdev->vq[i].vring.num = qemu_get_be32(f);
3571        if (k->has_variable_vring_alignment) {
3572            vdev->vq[i].vring.align = qemu_get_be32(f);
3573        }
3574        vdev->vq[i].vring.desc = qemu_get_be64(f);
3575        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3576        vdev->vq[i].signalled_used_valid = false;
3577        vdev->vq[i].notification = true;
3578
3579        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3580            error_report("VQ %d address 0x0 "
3581                         "inconsistent with Host index 0x%x",
3582                         i, vdev->vq[i].last_avail_idx);
3583            return -1;
3584        }
3585        if (k->load_queue) {
3586            ret = k->load_queue(qbus->parent, i, f);
3587            if (ret)
3588                return ret;
3589        }
3590    }
3591
3592    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3593
3594    if (vdc->load != NULL) {
3595        ret = vdc->load(vdev, f, version_id);
3596        if (ret) {
3597            return ret;
3598        }
3599    }
3600
3601    if (vdc->vmsd) {
3602        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3603        if (ret) {
3604            return ret;
3605        }
3606    }
3607
3608    /* Subsections */
3609    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3610    if (ret) {
3611        return ret;
3612    }
3613
3614    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3615        vdev->device_endian = virtio_default_endian();
3616    }
3617
3618    if (virtio_64bit_features_needed(vdev)) {
3619        /*
3620         * Subsection load filled vdev->guest_features.  Run them
3621         * through virtio_set_features to sanity-check them against
3622         * host_features.
3623         */
3624        uint64_t features64 = vdev->guest_features;
3625        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3626            error_report("Features 0x%" PRIx64 " unsupported. "
3627                         "Allowed features: 0x%" PRIx64,
3628                         features64, vdev->host_features);
3629            return -1;
3630        }
3631    } else {
3632        if (virtio_set_features_nocheck(vdev, features) < 0) {
3633            error_report("Features 0x%x unsupported. "
3634                         "Allowed features: 0x%" PRIx64,
3635                         features, vdev->host_features);
3636            return -1;
3637        }
3638    }
3639
3640    if (!virtio_device_started(vdev, vdev->status) &&
3641        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3642        vdev->start_on_kick = true;
3643    }
3644
3645    RCU_READ_LOCK_GUARD();
3646    for (i = 0; i < num; i++) {
3647        if (vdev->vq[i].vring.desc) {
3648            uint16_t nheads;
3649
3650            /*
3651             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3652             * only the region cache needs to be set up.  Legacy devices need
3653             * to calculate used and avail ring addresses based on the desc
3654             * address.
3655             */
3656            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3657                virtio_init_region_cache(vdev, i);
3658            } else {
3659                virtio_queue_update_rings(vdev, i);
3660            }
3661
3662            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3663                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3664                vdev->vq[i].shadow_avail_wrap_counter =
3665                                        vdev->vq[i].last_avail_wrap_counter;
3666                continue;
3667            }
3668
3669            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3670            /* Check it isn't doing strange things with descriptor numbers. */
3671            if (nheads > vdev->vq[i].vring.num) {
3672                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3673                             "inconsistent with Host index 0x%x: delta 0x%x",
3674                             i, vdev->vq[i].vring.num,
3675                             vring_avail_idx(&vdev->vq[i]),
3676                             vdev->vq[i].last_avail_idx, nheads);
3677                vdev->vq[i].used_idx = 0;
3678                vdev->vq[i].shadow_avail_idx = 0;
3679                vdev->vq[i].inuse = 0;
3680                continue;
3681            }
3682            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3683            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3684
3685            /*
3686             * Some devices migrate VirtQueueElements that have been popped
3687             * from the avail ring but not yet returned to the used ring.
3688             * Since max ring size < UINT16_MAX it's safe to use modulo
3689             * UINT16_MAX + 1 subtraction.
3690             */
3691            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3692                                vdev->vq[i].used_idx);
3693            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3694                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3695                             "used_idx 0x%x",
3696                             i, vdev->vq[i].vring.num,
3697                             vdev->vq[i].last_avail_idx,
3698                             vdev->vq[i].used_idx);
3699                return -1;
3700            }
3701        }
3702    }
3703
3704    if (vdc->post_load) {
3705        ret = vdc->post_load(vdev);
3706        if (ret) {
3707            return ret;
3708        }
3709    }
3710
3711    return 0;
3712}
3713
3714void virtio_cleanup(VirtIODevice *vdev)
3715{
3716    qemu_del_vm_change_state_handler(vdev->vmstate);
3717}
3718
3719static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3720{
3721    VirtIODevice *vdev = opaque;
3722    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3723    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3724    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3725    vdev->vm_running = running;
3726
3727    if (backend_run) {
3728        virtio_set_status(vdev, vdev->status);
3729    }
3730
3731    if (k->vmstate_change) {
3732        k->vmstate_change(qbus->parent, backend_run);
3733    }
3734
3735    if (!backend_run) {
3736        virtio_set_status(vdev, vdev->status);
3737    }
3738}
3739
3740void virtio_instance_init_common(Object *proxy_obj, void *data,
3741                                 size_t vdev_size, const char *vdev_name)
3742{
3743    DeviceState *vdev = data;
3744
3745    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3746                                       vdev_size, vdev_name, &error_abort,
3747                                       NULL);
3748    qdev_alias_all_properties(vdev, proxy_obj);
3749}
3750
3751void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3752{
3753    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3754    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3755    int i;
3756    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3757
3758    if (nvectors) {
3759        vdev->vector_queues =
3760            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3761    }
3762
3763    vdev->start_on_kick = false;
3764    vdev->started = false;
3765    vdev->vhost_started = false;
3766    vdev->device_id = device_id;
3767    vdev->status = 0;
3768    qatomic_set(&vdev->isr, 0);
3769    vdev->queue_sel = 0;
3770    vdev->config_vector = VIRTIO_NO_VECTOR;
3771    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3772    vdev->vm_running = runstate_is_running();
3773    vdev->broken = false;
3774    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3775        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3776        vdev->vq[i].vdev = vdev;
3777        vdev->vq[i].queue_index = i;
3778        vdev->vq[i].host_notifier_enabled = false;
3779    }
3780
3781    vdev->name = virtio_id_to_name(device_id);
3782    vdev->config_len = config_size;
3783    if (vdev->config_len) {
3784        vdev->config = g_malloc0(config_size);
3785    } else {
3786        vdev->config = NULL;
3787    }
3788    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3789            virtio_vmstate_change, vdev);
3790    vdev->device_endian = virtio_default_endian();
3791    vdev->use_guest_notifier_mask = true;
3792}
3793
3794/*
3795 * Only devices that have already been around prior to defining the virtio
3796 * standard support legacy mode; this includes devices not specified in the
3797 * standard. All newer devices conform to the virtio standard only.
3798 */
3799bool virtio_legacy_allowed(VirtIODevice *vdev)
3800{
3801    switch (vdev->device_id) {
3802    case VIRTIO_ID_NET:
3803    case VIRTIO_ID_BLOCK:
3804    case VIRTIO_ID_CONSOLE:
3805    case VIRTIO_ID_RNG:
3806    case VIRTIO_ID_BALLOON:
3807    case VIRTIO_ID_RPMSG:
3808    case VIRTIO_ID_SCSI:
3809    case VIRTIO_ID_9P:
3810    case VIRTIO_ID_RPROC_SERIAL:
3811    case VIRTIO_ID_CAIF:
3812        return true;
3813    default:
3814        return false;
3815    }
3816}
3817
3818bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3819{
3820    return vdev->disable_legacy_check;
3821}
3822
3823hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3824{
3825    return vdev->vq[n].vring.desc;
3826}
3827
3828bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3829{
3830    return virtio_queue_get_desc_addr(vdev, n) != 0;
3831}
3832
3833bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3834{
3835    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3836    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3837
3838    if (k->queue_enabled) {
3839        return k->queue_enabled(qbus->parent, n);
3840    }
3841    return virtio_queue_enabled_legacy(vdev, n);
3842}
3843
3844hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3845{
3846    return vdev->vq[n].vring.avail;
3847}
3848
3849hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3850{
3851    return vdev->vq[n].vring.used;
3852}
3853
3854hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3855{
3856    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3857}
3858
3859hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3860{
3861    int s;
3862
3863    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3864        return sizeof(struct VRingPackedDescEvent);
3865    }
3866
3867    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3868    return offsetof(VRingAvail, ring) +
3869        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3870}
3871
3872hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3873{
3874    int s;
3875
3876    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3877        return sizeof(struct VRingPackedDescEvent);
3878    }
3879
3880    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3881    return offsetof(VRingUsed, ring) +
3882        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3883}
3884
3885static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3886                                                           int n)
3887{
3888    unsigned int avail, used;
3889
3890    avail = vdev->vq[n].last_avail_idx;
3891    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3892
3893    used = vdev->vq[n].used_idx;
3894    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3895
3896    return avail | used << 16;
3897}
3898
3899static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3900                                                      int n)
3901{
3902    return vdev->vq[n].last_avail_idx;
3903}
3904
3905unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3906{
3907    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3908        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3909    } else {
3910        return virtio_queue_split_get_last_avail_idx(vdev, n);
3911    }
3912}
3913
3914static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3915                                                   int n, unsigned int idx)
3916{
3917    struct VirtQueue *vq = &vdev->vq[n];
3918
3919    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3920    vq->last_avail_wrap_counter =
3921        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3922    idx >>= 16;
3923    vq->used_idx = idx & 0x7ffff;
3924    vq->used_wrap_counter = !!(idx & 0x8000);
3925}
3926
3927static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3928                                                  int n, unsigned int idx)
3929{
3930        vdev->vq[n].last_avail_idx = idx;
3931        vdev->vq[n].shadow_avail_idx = idx;
3932}
3933
3934void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3935                                     unsigned int idx)
3936{
3937    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3938        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3939    } else {
3940        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3941    }
3942}
3943
3944static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3945                                                       int n)
3946{
3947    /* We don't have a reference like avail idx in shared memory */
3948    return;
3949}
3950
3951static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3952                                                      int n)
3953{
3954    RCU_READ_LOCK_GUARD();
3955    if (vdev->vq[n].vring.desc) {
3956        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3957        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3958    }
3959}
3960
3961void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3962{
3963    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3964        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3965    } else {
3966        virtio_queue_split_restore_last_avail_idx(vdev, n);
3967    }
3968}
3969
3970static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3971{
3972    /* used idx was updated through set_last_avail_idx() */
3973    return;
3974}
3975
3976static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3977{
3978    RCU_READ_LOCK_GUARD();
3979    if (vdev->vq[n].vring.desc) {
3980        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3981    }
3982}
3983
3984void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3985{
3986    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3987        return virtio_queue_packed_update_used_idx(vdev, n);
3988    } else {
3989        return virtio_split_packed_update_used_idx(vdev, n);
3990    }
3991}
3992
3993void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3994{
3995    vdev->vq[n].signalled_used_valid = false;
3996}
3997
3998VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3999{
4000    return vdev->vq + n;
4001}
4002
4003uint16_t virtio_get_queue_index(VirtQueue *vq)
4004{
4005    return vq->queue_index;
4006}
4007
4008static void virtio_queue_guest_notifier_read(EventNotifier *n)
4009{
4010    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
4011    if (event_notifier_test_and_clear(n)) {
4012        virtio_irq(vq);
4013    }
4014}
4015
4016void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
4017                                                bool with_irqfd)
4018{
4019    if (assign && !with_irqfd) {
4020        event_notifier_set_handler(&vq->guest_notifier,
4021                                   virtio_queue_guest_notifier_read);
4022    } else {
4023        event_notifier_set_handler(&vq->guest_notifier, NULL);
4024    }
4025    if (!assign) {
4026        /* Test and clear notifier before closing it,
4027         * in case poll callback didn't have time to run. */
4028        virtio_queue_guest_notifier_read(&vq->guest_notifier);
4029    }
4030}
4031
4032EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
4033{
4034    return &vq->guest_notifier;
4035}
4036
4037static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
4038{
4039    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4040
4041    virtio_queue_set_notification(vq, 0);
4042}
4043
4044static bool virtio_queue_host_notifier_aio_poll(void *opaque)
4045{
4046    EventNotifier *n = opaque;
4047    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4048
4049    return vq->vring.desc && !virtio_queue_empty(vq);
4050}
4051
4052static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
4053{
4054    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4055
4056    virtio_queue_notify_vq(vq);
4057}
4058
4059static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
4060{
4061    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4062
4063    /* Caller polls once more after this to catch requests that race with us */
4064    virtio_queue_set_notification(vq, 1);
4065}
4066
4067void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
4068{
4069    aio_set_event_notifier(ctx, &vq->host_notifier, true,
4070                           virtio_queue_host_notifier_read,
4071                           virtio_queue_host_notifier_aio_poll,
4072                           virtio_queue_host_notifier_aio_poll_ready);
4073    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
4074                                virtio_queue_host_notifier_aio_poll_begin,
4075                                virtio_queue_host_notifier_aio_poll_end);
4076}
4077
4078/*
4079 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
4080 * this for rx virtqueues and similar cases where the virtqueue handler
4081 * function does not pop all elements. When the virtqueue is left non-empty
4082 * polling consumes CPU cycles and should not be used.
4083 */
4084void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
4085{
4086    aio_set_event_notifier(ctx, &vq->host_notifier, true,
4087                           virtio_queue_host_notifier_read,
4088                           NULL, NULL);
4089}
4090
4091void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
4092{
4093    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
4094    /* Test and clear notifier before after disabling event,
4095     * in case poll callback didn't have time to run. */
4096    virtio_queue_host_notifier_read(&vq->host_notifier);
4097}
4098
4099void virtio_queue_host_notifier_read(EventNotifier *n)
4100{
4101    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4102    if (event_notifier_test_and_clear(n)) {
4103        virtio_queue_notify_vq(vq);
4104    }
4105}
4106
4107EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
4108{
4109    return &vq->host_notifier;
4110}
4111
4112void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
4113{
4114    vq->host_notifier_enabled = enabled;
4115}
4116
4117int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
4118                                      MemoryRegion *mr, bool assign)
4119{
4120    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4121    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
4122
4123    if (k->set_host_notifier_mr) {
4124        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
4125    }
4126
4127    return -1;
4128}
4129
4130void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
4131{
4132    g_free(vdev->bus_name);
4133    vdev->bus_name = g_strdup(bus_name);
4134}
4135
4136void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
4137{
4138    va_list ap;
4139
4140    va_start(ap, fmt);
4141    error_vreport(fmt, ap);
4142    va_end(ap);
4143
4144    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
4145        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
4146        virtio_notify_config(vdev);
4147    }
4148
4149    vdev->broken = true;
4150}
4151
4152static void virtio_memory_listener_commit(MemoryListener *listener)
4153{
4154    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
4155    int i;
4156
4157    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4158        if (vdev->vq[i].vring.num == 0) {
4159            break;
4160        }
4161        virtio_init_region_cache(vdev, i);
4162    }
4163}
4164
4165static void virtio_device_realize(DeviceState *dev, Error **errp)
4166{
4167    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4168    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4169    Error *err = NULL;
4170
4171    /* Devices should either use vmsd or the load/save methods */
4172    assert(!vdc->vmsd || !vdc->load);
4173
4174    if (vdc->realize != NULL) {
4175        vdc->realize(dev, &err);
4176        if (err != NULL) {
4177            error_propagate(errp, err);
4178            return;
4179        }
4180    }
4181
4182    virtio_bus_device_plugged(vdev, &err);
4183    if (err != NULL) {
4184        error_propagate(errp, err);
4185        vdc->unrealize(dev);
4186        return;
4187    }
4188
4189    vdev->listener.commit = virtio_memory_listener_commit;
4190    vdev->listener.name = "virtio";
4191    memory_listener_register(&vdev->listener, vdev->dma_as);
4192    QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
4193}
4194
4195static void virtio_device_unrealize(DeviceState *dev)
4196{
4197    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4198    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4199
4200    memory_listener_unregister(&vdev->listener);
4201    virtio_bus_device_unplugged(vdev);
4202
4203    if (vdc->unrealize != NULL) {
4204        vdc->unrealize(dev);
4205    }
4206
4207    QTAILQ_REMOVE(&virtio_list, vdev, next);
4208    g_free(vdev->bus_name);
4209    vdev->bus_name = NULL;
4210}
4211
4212static void virtio_device_free_virtqueues(VirtIODevice *vdev)
4213{
4214    int i;
4215    if (!vdev->vq) {
4216        return;
4217    }
4218
4219    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4220        if (vdev->vq[i].vring.num == 0) {
4221            break;
4222        }
4223        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
4224    }
4225    g_free(vdev->vq);
4226}
4227
4228static void virtio_device_instance_finalize(Object *obj)
4229{
4230    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
4231
4232    virtio_device_free_virtqueues(vdev);
4233
4234    g_free(vdev->config);
4235    g_free(vdev->vector_queues);
4236}
4237
4238static Property virtio_properties[] = {
4239    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
4240    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
4241    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
4242    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
4243                     disable_legacy_check, false),
4244    DEFINE_PROP_END_OF_LIST(),
4245};
4246
4247static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
4248{
4249    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4250    int i, n, r, err;
4251
4252    /*
4253     * Batch all the host notifiers in a single transaction to avoid
4254     * quadratic time complexity in address_space_update_ioeventfds().
4255     */
4256    memory_region_transaction_begin();
4257    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4258        VirtQueue *vq = &vdev->vq[n];
4259        if (!virtio_queue_get_num(vdev, n)) {
4260            continue;
4261        }
4262        r = virtio_bus_set_host_notifier(qbus, n, true);
4263        if (r < 0) {
4264            err = r;
4265            goto assign_error;
4266        }
4267        event_notifier_set_handler(&vq->host_notifier,
4268                                   virtio_queue_host_notifier_read);
4269    }
4270
4271    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4272        /* Kick right away to begin processing requests already in vring */
4273        VirtQueue *vq = &vdev->vq[n];
4274        if (!vq->vring.num) {
4275            continue;
4276        }
4277        event_notifier_set(&vq->host_notifier);
4278    }
4279    memory_region_transaction_commit();
4280    return 0;
4281
4282assign_error:
4283    i = n; /* save n for a second iteration after transaction is committed. */
4284    while (--n >= 0) {
4285        VirtQueue *vq = &vdev->vq[n];
4286        if (!virtio_queue_get_num(vdev, n)) {
4287            continue;
4288        }
4289
4290        event_notifier_set_handler(&vq->host_notifier, NULL);
4291        r = virtio_bus_set_host_notifier(qbus, n, false);
4292        assert(r >= 0);
4293    }
4294    /*
4295     * The transaction expects the ioeventfds to be open when it
4296     * commits. Do it now, before the cleanup loop.
4297     */
4298    memory_region_transaction_commit();
4299
4300    while (--i >= 0) {
4301        if (!virtio_queue_get_num(vdev, i)) {
4302            continue;
4303        }
4304        virtio_bus_cleanup_host_notifier(qbus, i);
4305    }
4306    return err;
4307}
4308
4309int virtio_device_start_ioeventfd(VirtIODevice *vdev)
4310{
4311    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4312    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4313
4314    return virtio_bus_start_ioeventfd(vbus);
4315}
4316
4317static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
4318{
4319    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4320    int n, r;
4321
4322    /*
4323     * Batch all the host notifiers in a single transaction to avoid
4324     * quadratic time complexity in address_space_update_ioeventfds().
4325     */
4326    memory_region_transaction_begin();
4327    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4328        VirtQueue *vq = &vdev->vq[n];
4329
4330        if (!virtio_queue_get_num(vdev, n)) {
4331            continue;
4332        }
4333        event_notifier_set_handler(&vq->host_notifier, NULL);
4334        r = virtio_bus_set_host_notifier(qbus, n, false);
4335        assert(r >= 0);
4336    }
4337    /*
4338     * The transaction expects the ioeventfds to be open when it
4339     * commits. Do it now, before the cleanup loop.
4340     */
4341    memory_region_transaction_commit();
4342
4343    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4344        if (!virtio_queue_get_num(vdev, n)) {
4345            continue;
4346        }
4347        virtio_bus_cleanup_host_notifier(qbus, n);
4348    }
4349}
4350
4351int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
4352{
4353    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4354    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4355
4356    return virtio_bus_grab_ioeventfd(vbus);
4357}
4358
4359void virtio_device_release_ioeventfd(VirtIODevice *vdev)
4360{
4361    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4362    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4363
4364    virtio_bus_release_ioeventfd(vbus);
4365}
4366
4367static void virtio_device_class_init(ObjectClass *klass, void *data)
4368{
4369    /* Set the default value here. */
4370    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4371    DeviceClass *dc = DEVICE_CLASS(klass);
4372
4373    dc->realize = virtio_device_realize;
4374    dc->unrealize = virtio_device_unrealize;
4375    dc->bus_type = TYPE_VIRTIO_BUS;
4376    device_class_set_props(dc, virtio_properties);
4377    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
4378    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
4379
4380    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
4381
4382    QTAILQ_INIT(&virtio_list);
4383}
4384
4385bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
4386{
4387    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4388    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4389
4390    return virtio_bus_ioeventfd_enabled(vbus);
4391}
4392
4393VirtioInfoList *qmp_x_query_virtio(Error **errp)
4394{
4395    VirtioInfoList *list = NULL;
4396    VirtioInfoList *node;
4397    VirtIODevice *vdev;
4398
4399    QTAILQ_FOREACH(vdev, &virtio_list, next) {
4400        DeviceState *dev = DEVICE(vdev);
4401        Error *err = NULL;
4402        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4403
4404        if (err == NULL) {
4405            GString *is_realized = qobject_to_json_pretty(obj, true);
4406            /* virtio device is NOT realized, remove it from list */
4407            if (!strncmp(is_realized->str, "false", 4)) {
4408                QTAILQ_REMOVE(&virtio_list, vdev, next);
4409            } else {
4410                node = g_new0(VirtioInfoList, 1);
4411                node->value = g_new(VirtioInfo, 1);
4412                node->value->path = g_strdup(dev->canonical_path);
4413                node->value->name = g_strdup(vdev->name);
4414                QAPI_LIST_PREPEND(list, node->value);
4415            }
4416           g_string_free(is_realized, true);
4417        }
4418        qobject_unref(obj);
4419    }
4420
4421    return list;
4422}
4423
4424static VirtIODevice *virtio_device_find(const char *path)
4425{
4426    VirtIODevice *vdev;
4427
4428    QTAILQ_FOREACH(vdev, &virtio_list, next) {
4429        DeviceState *dev = DEVICE(vdev);
4430
4431        if (strcmp(dev->canonical_path, path) != 0) {
4432            continue;
4433        }
4434
4435        Error *err = NULL;
4436        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4437        if (err == NULL) {
4438            GString *is_realized = qobject_to_json_pretty(obj, true);
4439            /* virtio device is NOT realized, remove it from list */
4440            if (!strncmp(is_realized->str, "false", 4)) {
4441                g_string_free(is_realized, true);
4442                qobject_unref(obj);
4443                QTAILQ_REMOVE(&virtio_list, vdev, next);
4444                return NULL;
4445            }
4446            g_string_free(is_realized, true);
4447        } else {
4448            /* virtio device doesn't exist in QOM tree */
4449            QTAILQ_REMOVE(&virtio_list, vdev, next);
4450            qobject_unref(obj);
4451            return NULL;
4452        }
4453        /* device exists in QOM tree & is realized */
4454        qobject_unref(obj);
4455        return vdev;
4456    }
4457    return NULL;
4458}
4459
4460#define CONVERT_FEATURES(type, map, is_status, bitmap)   \
4461    ({                                                   \
4462        type *list = NULL;                               \
4463        type *node;                                      \
4464        for (i = 0; map[i].virtio_bit != -1; i++) {      \
4465            if (is_status) {                             \
4466                bit = map[i].virtio_bit;                 \
4467            }                                            \
4468            else {                                       \
4469                bit = 1ULL << map[i].virtio_bit;         \
4470            }                                            \
4471            if ((bitmap & bit) == 0) {                   \
4472                continue;                                \
4473            }                                            \
4474            node = g_new0(type, 1);                      \
4475            node->value = g_strdup(map[i].feature_desc); \
4476            node->next = list;                           \
4477            list = node;                                 \
4478            bitmap ^= bit;                               \
4479        }                                                \
4480        list;                                            \
4481    })
4482
4483static VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap)
4484{
4485    VirtioDeviceStatus *status;
4486    uint8_t bit;
4487    int i;
4488
4489    status = g_new0(VirtioDeviceStatus, 1);
4490    status->statuses = CONVERT_FEATURES(strList, virtio_config_status_map,
4491                                        1, bitmap);
4492    status->has_unknown_statuses = bitmap != 0;
4493    if (status->has_unknown_statuses) {
4494        status->unknown_statuses = bitmap;
4495    }
4496
4497    return status;
4498}
4499
4500static VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap)
4501{
4502    VhostDeviceProtocols *vhu_protocols;
4503    uint64_t bit;
4504    int i;
4505
4506    vhu_protocols = g_new0(VhostDeviceProtocols, 1);
4507    vhu_protocols->protocols =
4508                    CONVERT_FEATURES(strList,
4509                                     vhost_user_protocol_map, 0, bitmap);
4510    vhu_protocols->has_unknown_protocols = bitmap != 0;
4511    if (vhu_protocols->has_unknown_protocols) {
4512        vhu_protocols->unknown_protocols = bitmap;
4513    }
4514
4515    return vhu_protocols;
4516}
4517
4518static VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
4519                                                 uint64_t bitmap)
4520{
4521    VirtioDeviceFeatures *features;
4522    uint64_t bit;
4523    int i;
4524
4525    features = g_new0(VirtioDeviceFeatures, 1);
4526    features->has_dev_features = true;
4527
4528    /* transport features */
4529    features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0,
4530                                            bitmap);
4531
4532    /* device features */
4533    switch (device_id) {
4534#ifdef CONFIG_VIRTIO_SERIAL
4535    case VIRTIO_ID_CONSOLE:
4536        features->dev_features =
4537            CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap);
4538        break;
4539#endif
4540#ifdef CONFIG_VIRTIO_BLK
4541    case VIRTIO_ID_BLOCK:
4542        features->dev_features =
4543            CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap);
4544        break;
4545#endif
4546#ifdef CONFIG_VIRTIO_GPU
4547    case VIRTIO_ID_GPU:
4548        features->dev_features =
4549            CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap);
4550        break;
4551#endif
4552#ifdef CONFIG_VIRTIO_NET
4553    case VIRTIO_ID_NET:
4554        features->dev_features =
4555            CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap);
4556        break;
4557#endif
4558#ifdef CONFIG_VIRTIO_SCSI
4559    case VIRTIO_ID_SCSI:
4560        features->dev_features =
4561            CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap);
4562        break;
4563#endif
4564#ifdef CONFIG_VIRTIO_BALLOON
4565    case VIRTIO_ID_BALLOON:
4566        features->dev_features =
4567            CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap);
4568        break;
4569#endif
4570#ifdef CONFIG_VIRTIO_IOMMU
4571    case VIRTIO_ID_IOMMU:
4572        features->dev_features =
4573            CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap);
4574        break;
4575#endif
4576#ifdef CONFIG_VIRTIO_INPUT
4577    case VIRTIO_ID_INPUT:
4578        features->dev_features =
4579            CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap);
4580        break;
4581#endif
4582#ifdef CONFIG_VHOST_USER_FS
4583    case VIRTIO_ID_FS:
4584        features->dev_features =
4585            CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap);
4586        break;
4587#endif
4588#ifdef CONFIG_VHOST_VSOCK
4589    case VIRTIO_ID_VSOCK:
4590        features->dev_features =
4591            CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap);
4592        break;
4593#endif
4594#ifdef CONFIG_VIRTIO_CRYPTO
4595    case VIRTIO_ID_CRYPTO:
4596        features->dev_features =
4597            CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap);
4598        break;
4599#endif
4600#ifdef CONFIG_VIRTIO_MEM
4601    case VIRTIO_ID_MEM:
4602        features->dev_features =
4603            CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap);
4604        break;
4605#endif
4606#ifdef CONFIG_VIRTIO_I2C_ADAPTER
4607    case VIRTIO_ID_I2C_ADAPTER:
4608        features->dev_features =
4609            CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap);
4610        break;
4611#endif
4612#ifdef CONFIG_VIRTIO_RNG
4613    case VIRTIO_ID_RNG:
4614        features->dev_features =
4615            CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap);
4616        break;
4617#endif
4618    /* No features */
4619    case VIRTIO_ID_9P:
4620    case VIRTIO_ID_PMEM:
4621    case VIRTIO_ID_IOMEM:
4622    case VIRTIO_ID_RPMSG:
4623    case VIRTIO_ID_CLOCK:
4624    case VIRTIO_ID_MAC80211_WLAN:
4625    case VIRTIO_ID_MAC80211_HWSIM:
4626    case VIRTIO_ID_RPROC_SERIAL:
4627    case VIRTIO_ID_MEMORY_BALLOON:
4628    case VIRTIO_ID_CAIF:
4629    case VIRTIO_ID_SIGNAL_DIST:
4630    case VIRTIO_ID_PSTORE:
4631    case VIRTIO_ID_SOUND:
4632    case VIRTIO_ID_BT:
4633    case VIRTIO_ID_RPMB:
4634    case VIRTIO_ID_VIDEO_ENCODER:
4635    case VIRTIO_ID_VIDEO_DECODER:
4636    case VIRTIO_ID_SCMI:
4637    case VIRTIO_ID_NITRO_SEC_MOD:
4638    case VIRTIO_ID_WATCHDOG:
4639    case VIRTIO_ID_CAN:
4640    case VIRTIO_ID_DMABUF:
4641    case VIRTIO_ID_PARAM_SERV:
4642    case VIRTIO_ID_AUDIO_POLICY:
4643    case VIRTIO_ID_GPIO:
4644        break;
4645    default:
4646        g_assert_not_reached();
4647    }
4648
4649    features->has_unknown_dev_features = bitmap != 0;
4650    if (features->has_unknown_dev_features) {
4651        features->unknown_dev_features = bitmap;
4652    }
4653
4654    return features;
4655}
4656
4657VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
4658{
4659    VirtIODevice *vdev;
4660    VirtioStatus *status;
4661
4662    vdev = virtio_device_find(path);
4663    if (vdev == NULL) {
4664        error_setg(errp, "Path %s is not a VirtIODevice", path);
4665        return NULL;
4666    }
4667
4668    status = g_new0(VirtioStatus, 1);
4669    status->name = g_strdup(vdev->name);
4670    status->device_id = vdev->device_id;
4671    status->vhost_started = vdev->vhost_started;
4672    status->guest_features = qmp_decode_features(vdev->device_id,
4673                                                 vdev->guest_features);
4674    status->host_features = qmp_decode_features(vdev->device_id,
4675                                                vdev->host_features);
4676    status->backend_features = qmp_decode_features(vdev->device_id,
4677                                                   vdev->backend_features);
4678
4679    switch (vdev->device_endian) {
4680    case VIRTIO_DEVICE_ENDIAN_LITTLE:
4681        status->device_endian = g_strdup("little");
4682        break;
4683    case VIRTIO_DEVICE_ENDIAN_BIG:
4684        status->device_endian = g_strdup("big");
4685        break;
4686    default:
4687        status->device_endian = g_strdup("unknown");
4688        break;
4689    }
4690
4691    status->num_vqs = virtio_get_num_queues(vdev);
4692    status->status = qmp_decode_status(vdev->status);
4693    status->isr = vdev->isr;
4694    status->queue_sel = vdev->queue_sel;
4695    status->vm_running = vdev->vm_running;
4696    status->broken = vdev->broken;
4697    status->disabled = vdev->disabled;
4698    status->use_started = vdev->use_started;
4699    status->started = vdev->started;
4700    status->start_on_kick = vdev->start_on_kick;
4701    status->disable_legacy_check = vdev->disable_legacy_check;
4702    status->bus_name = g_strdup(vdev->bus_name);
4703    status->use_guest_notifier_mask = vdev->use_guest_notifier_mask;
4704    status->has_vhost_dev = vdev->vhost_started;
4705
4706    if (vdev->vhost_started) {
4707        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4708        struct vhost_dev *hdev = vdc->get_vhost(vdev);
4709
4710        status->vhost_dev = g_new0(VhostStatus, 1);
4711        status->vhost_dev->n_mem_sections = hdev->n_mem_sections;
4712        status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections;
4713        status->vhost_dev->nvqs = hdev->nvqs;
4714        status->vhost_dev->vq_index = hdev->vq_index;
4715        status->vhost_dev->features =
4716            qmp_decode_features(vdev->device_id, hdev->features);
4717        status->vhost_dev->acked_features =
4718            qmp_decode_features(vdev->device_id, hdev->acked_features);
4719        status->vhost_dev->backend_features =
4720            qmp_decode_features(vdev->device_id, hdev->backend_features);
4721        status->vhost_dev->protocol_features =
4722            qmp_decode_protocols(hdev->protocol_features);
4723        status->vhost_dev->max_queues = hdev->max_queues;
4724        status->vhost_dev->backend_cap = hdev->backend_cap;
4725        status->vhost_dev->log_enabled = hdev->log_enabled;
4726        status->vhost_dev->log_size = hdev->log_size;
4727    }
4728
4729    return status;
4730}
4731
4732VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
4733                                                            uint16_t queue,
4734                                                            Error **errp)
4735{
4736    VirtIODevice *vdev;
4737    VirtVhostQueueStatus *status;
4738
4739    vdev = virtio_device_find(path);
4740    if (vdev == NULL) {
4741        error_setg(errp, "Path %s is not a VirtIODevice", path);
4742        return NULL;
4743    }
4744
4745    if (!vdev->vhost_started) {
4746        error_setg(errp, "Error: vhost device has not started yet");
4747        return NULL;
4748    }
4749
4750    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4751    struct vhost_dev *hdev = vdc->get_vhost(vdev);
4752
4753    if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) {
4754        error_setg(errp, "Invalid vhost virtqueue number %d", queue);
4755        return NULL;
4756    }
4757
4758    status = g_new0(VirtVhostQueueStatus, 1);
4759    status->name = g_strdup(vdev->name);
4760    status->kick = hdev->vqs[queue].kick;
4761    status->call = hdev->vqs[queue].call;
4762    status->desc = (uintptr_t)hdev->vqs[queue].desc;
4763    status->avail = (uintptr_t)hdev->vqs[queue].avail;
4764    status->used = (uintptr_t)hdev->vqs[queue].used;
4765    status->num = hdev->vqs[queue].num;
4766    status->desc_phys = hdev->vqs[queue].desc_phys;
4767    status->desc_size = hdev->vqs[queue].desc_size;
4768    status->avail_phys = hdev->vqs[queue].avail_phys;
4769    status->avail_size = hdev->vqs[queue].avail_size;
4770    status->used_phys = hdev->vqs[queue].used_phys;
4771    status->used_size = hdev->vqs[queue].used_size;
4772
4773    return status;
4774}
4775
4776VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4777                                                 uint16_t queue,
4778                                                 Error **errp)
4779{
4780    VirtIODevice *vdev;
4781    VirtQueueStatus *status;
4782
4783    vdev = virtio_device_find(path);
4784    if (vdev == NULL) {
4785        error_setg(errp, "Path %s is not a VirtIODevice", path);
4786        return NULL;
4787    }
4788
4789    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4790        error_setg(errp, "Invalid virtqueue number %d", queue);
4791        return NULL;
4792    }
4793
4794    status = g_new0(VirtQueueStatus, 1);
4795    status->name = g_strdup(vdev->name);
4796    status->queue_index = vdev->vq[queue].queue_index;
4797    status->inuse = vdev->vq[queue].inuse;
4798    status->vring_num = vdev->vq[queue].vring.num;
4799    status->vring_num_default = vdev->vq[queue].vring.num_default;
4800    status->vring_align = vdev->vq[queue].vring.align;
4801    status->vring_desc = vdev->vq[queue].vring.desc;
4802    status->vring_avail = vdev->vq[queue].vring.avail;
4803    status->vring_used = vdev->vq[queue].vring.used;
4804    status->used_idx = vdev->vq[queue].used_idx;
4805    status->signalled_used = vdev->vq[queue].signalled_used;
4806    status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4807
4808    if (vdev->vhost_started) {
4809        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4810        struct vhost_dev *hdev = vdc->get_vhost(vdev);
4811
4812        /* check if vq index exists for vhost as well  */
4813        if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4814            status->has_last_avail_idx = true;
4815
4816            int vhost_vq_index =
4817                hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4818            struct vhost_vring_state state = {
4819                .index = vhost_vq_index,
4820            };
4821
4822            status->last_avail_idx =
4823                hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4824        }
4825    } else {
4826        status->has_shadow_avail_idx = true;
4827        status->has_last_avail_idx = true;
4828        status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4829        status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4830    }
4831
4832    return status;
4833}
4834
4835static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4836{
4837    strList *list = NULL;
4838    strList *node;
4839    int i;
4840
4841    struct {
4842        uint16_t flag;
4843        const char *value;
4844    } map[] = {
4845        { VRING_DESC_F_NEXT, "next" },
4846        { VRING_DESC_F_WRITE, "write" },
4847        { VRING_DESC_F_INDIRECT, "indirect" },
4848        { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4849        { 1 << VRING_PACKED_DESC_F_USED, "used" },
4850        { 0, "" }
4851    };
4852
4853    for (i = 0; map[i].flag; i++) {
4854        if ((map[i].flag & flags) == 0) {
4855            continue;
4856        }
4857        node = g_malloc0(sizeof(strList));
4858        node->value = g_strdup(map[i].value);
4859        node->next = list;
4860        list = node;
4861    }
4862
4863    return list;
4864}
4865
4866VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4867                                                     uint16_t queue,
4868                                                     bool has_index,
4869                                                     uint16_t index,
4870                                                     Error **errp)
4871{
4872    VirtIODevice *vdev;
4873    VirtQueue *vq;
4874    VirtioQueueElement *element = NULL;
4875
4876    vdev = virtio_device_find(path);
4877    if (vdev == NULL) {
4878        error_setg(errp, "Path %s is not a VirtIO device", path);
4879        return NULL;
4880    }
4881
4882    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4883        error_setg(errp, "Invalid virtqueue number %d", queue);
4884        return NULL;
4885    }
4886    vq = &vdev->vq[queue];
4887
4888    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4889        error_setg(errp, "Packed ring not supported");
4890        return NULL;
4891    } else {
4892        unsigned int head, i, max;
4893        VRingMemoryRegionCaches *caches;
4894        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
4895        MemoryRegionCache *desc_cache;
4896        VRingDesc desc;
4897        VirtioRingDescList *list = NULL;
4898        VirtioRingDescList *node;
4899        int rc; int ndescs;
4900
4901        RCU_READ_LOCK_GUARD();
4902
4903        max = vq->vring.num;
4904
4905        if (!has_index) {
4906            head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4907        } else {
4908            head = vring_avail_ring(vq, index % vq->vring.num);
4909        }
4910        i = head;
4911
4912        caches = vring_get_region_caches(vq);
4913        if (!caches) {
4914            error_setg(errp, "Region caches not initialized");
4915            return NULL;
4916        }
4917        if (caches->desc.len < max * sizeof(VRingDesc)) {
4918            error_setg(errp, "Cannot map descriptor ring");
4919            return NULL;
4920        }
4921
4922        desc_cache = &caches->desc;
4923        vring_split_desc_read(vdev, &desc, desc_cache, i);
4924        if (desc.flags & VRING_DESC_F_INDIRECT) {
4925            int64_t len;
4926            len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4927                                           desc.addr, desc.len, false);
4928            desc_cache = &indirect_desc_cache;
4929            if (len < desc.len) {
4930                error_setg(errp, "Cannot map indirect buffer");
4931                goto done;
4932            }
4933
4934            max = desc.len / sizeof(VRingDesc);
4935            i = 0;
4936            vring_split_desc_read(vdev, &desc, desc_cache, i);
4937        }
4938
4939        element = g_new0(VirtioQueueElement, 1);
4940        element->avail = g_new0(VirtioRingAvail, 1);
4941        element->used = g_new0(VirtioRingUsed, 1);
4942        element->name = g_strdup(vdev->name);
4943        element->index = head;
4944        element->avail->flags = vring_avail_flags(vq);
4945        element->avail->idx = vring_avail_idx(vq);
4946        element->avail->ring = head;
4947        element->used->flags = vring_used_flags(vq);
4948        element->used->idx = vring_used_idx(vq);
4949        ndescs = 0;
4950
4951        do {
4952            /* A buggy driver may produce an infinite loop */
4953            if (ndescs >= max) {
4954                break;
4955            }
4956            node = g_new0(VirtioRingDescList, 1);
4957            node->value = g_new0(VirtioRingDesc, 1);
4958            node->value->addr = desc.addr;
4959            node->value->len = desc.len;
4960            node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4961            node->next = list;
4962            list = node;
4963
4964            ndescs++;
4965            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4966                                                max, &i);
4967        } while (rc == VIRTQUEUE_READ_DESC_MORE);
4968        element->descs = list;
4969done:
4970        address_space_cache_destroy(&indirect_desc_cache);
4971    }
4972
4973    return element;
4974}
4975
4976static const TypeInfo virtio_device_info = {
4977    .name = TYPE_VIRTIO_DEVICE,
4978    .parent = TYPE_DEVICE,
4979    .instance_size = sizeof(VirtIODevice),
4980    .class_init = virtio_device_class_init,
4981    .instance_finalize = virtio_device_instance_finalize,
4982    .abstract = true,
4983    .class_size = sizeof(VirtioDeviceClass),
4984};
4985
4986static void virtio_register_types(void)
4987{
4988    type_register_static(&virtio_device_info);
4989}
4990
4991type_init(virtio_register_types)
4992