qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qapi/qmp/qdict.h"
  17#include "qapi/qapi-commands-virtio.h"
  18#include "qapi/qapi-commands-qom.h"
  19#include "qapi/qapi-visit-virtio.h"
  20#include "qapi/qmp/qjson.h"
  21#include "cpu.h"
  22#include "trace.h"
  23#include "qemu/error-report.h"
  24#include "qemu/log.h"
  25#include "qemu/main-loop.h"
  26#include "qemu/module.h"
  27#include "qom/object_interfaces.h"
  28#include "hw/virtio/virtio.h"
  29#include "migration/qemu-file-types.h"
  30#include "qemu/atomic.h"
  31#include "hw/virtio/virtio-bus.h"
  32#include "hw/qdev-properties.h"
  33#include "hw/virtio/virtio-access.h"
  34#include "sysemu/dma.h"
  35#include "sysemu/runstate.h"
  36#include "standard-headers/linux/virtio_ids.h"
  37#include "standard-headers/linux/vhost_types.h"
  38#include "standard-headers/linux/virtio_blk.h"
  39#include "standard-headers/linux/virtio_console.h"
  40#include "standard-headers/linux/virtio_gpu.h"
  41#include "standard-headers/linux/virtio_net.h"
  42#include "standard-headers/linux/virtio_scsi.h"
  43#include "standard-headers/linux/virtio_i2c.h"
  44#include "standard-headers/linux/virtio_balloon.h"
  45#include "standard-headers/linux/virtio_iommu.h"
  46#include "standard-headers/linux/virtio_mem.h"
  47#include "standard-headers/linux/virtio_vsock.h"
  48#include CONFIG_DEVICES
  49
  50/* QAPI list of realized VirtIODevices */
  51static QTAILQ_HEAD(, VirtIODevice) virtio_list;
  52
  53/*
  54 * Maximum size of virtio device config space
  55 */
  56#define VHOST_USER_MAX_CONFIG_SIZE 256
  57
  58#define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \
  59    { .virtio_bit = name, .feature_desc = desc }
  60
  61enum VhostUserProtocolFeature {
  62    VHOST_USER_PROTOCOL_F_MQ = 0,
  63    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  64    VHOST_USER_PROTOCOL_F_RARP = 2,
  65    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  66    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  67    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  68    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  69    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  70    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  71    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  72    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  73    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  74    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  75    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
  76    VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
  77    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
  78    VHOST_USER_PROTOCOL_F_MAX
  79};
  80
  81/* Virtio transport features mapping */
  82static qmp_virtio_feature_map_t virtio_transport_map[] = {
  83    /* Virtio device transport features */
  84#ifndef VIRTIO_CONFIG_NO_LEGACY
  85    FEATURE_ENTRY(VIRTIO_F_NOTIFY_ON_EMPTY, \
  86            "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. "
  87            "descs. on VQ"),
  88    FEATURE_ENTRY(VIRTIO_F_ANY_LAYOUT, \
  89            "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts"),
  90#endif /* !VIRTIO_CONFIG_NO_LEGACY */
  91    FEATURE_ENTRY(VIRTIO_F_VERSION_1, \
  92            "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"),
  93    FEATURE_ENTRY(VIRTIO_F_IOMMU_PLATFORM, \
  94            "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform"),
  95    FEATURE_ENTRY(VIRTIO_F_RING_PACKED, \
  96            "VIRTIO_F_RING_PACKED: Device supports packed VQ layout"),
  97    FEATURE_ENTRY(VIRTIO_F_IN_ORDER, \
  98            "VIRTIO_F_IN_ORDER: Device uses buffers in same order as made "
  99            "available by driver"),
 100    FEATURE_ENTRY(VIRTIO_F_ORDER_PLATFORM, \
 101            "VIRTIO_F_ORDER_PLATFORM: Memory accesses ordered by platform"),
 102    FEATURE_ENTRY(VIRTIO_F_SR_IOV, \
 103            "VIRTIO_F_SR_IOV: Device supports single root I/O virtualization"),
 104    /* Virtio ring transport features */
 105    FEATURE_ENTRY(VIRTIO_RING_F_INDIRECT_DESC, \
 106            "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported"),
 107    FEATURE_ENTRY(VIRTIO_RING_F_EVENT_IDX, \
 108            "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled"),
 109    { -1, "" }
 110};
 111
 112/* Vhost-user protocol features mapping */
 113static qmp_virtio_feature_map_t vhost_user_protocol_map[] = {
 114    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_MQ, \
 115            "VHOST_USER_PROTOCOL_F_MQ: Multiqueue protocol supported"),
 116    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_LOG_SHMFD, \
 117            "VHOST_USER_PROTOCOL_F_LOG_SHMFD: Shared log memory fd supported"),
 118    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RARP, \
 119            "VHOST_USER_PROTOCOL_F_RARP: Vhost-user back-end RARP broadcasting "
 120            "supported"),
 121    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_REPLY_ACK, \
 122            "VHOST_USER_PROTOCOL_F_REPLY_ACK: Requested operation status ack. "
 123            "supported"),
 124    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \
 125            "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"),
 126    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \
 127            "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated "
 128            "requests supported"),
 129    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \
 130            "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy "
 131            "devices supported"),
 132    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CRYPTO_SESSION, \
 133            "VHOST_USER_PROTOCOL_F_CRYPTO_SESSION: Session creation for crypto "
 134            "operations supported"),
 135    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_PAGEFAULT, \
 136            "VHOST_USER_PROTOCOL_F_PAGEFAULT: Request servicing on userfaultfd "
 137            "for accessed pages supported"),
 138    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \
 139            "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio "
 140            "device configuration space supported"),
 141    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \
 142            "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication "
 143            "channel supported"),
 144    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \
 145            "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified "
 146            "VQs supported"),
 147    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, \
 148            "VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: Shared inflight I/O buffers "
 149            "supported"),
 150    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RESET_DEVICE, \
 151            "VHOST_USER_PROTOCOL_F_RESET_DEVICE: Disabling all rings and "
 152            "resetting internal device state supported"),
 153    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS, \
 154            "VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS: In-band messaging "
 155            "supported"),
 156    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \
 157            "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for "
 158            "memory slots supported"),
 159    { -1, "" }
 160};
 161
 162/* virtio device configuration statuses */
 163static qmp_virtio_feature_map_t virtio_config_status_map[] = {
 164    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER_OK, \
 165            "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"),
 166    FEATURE_ENTRY(VIRTIO_CONFIG_S_FEATURES_OK, \
 167            "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete"),
 168    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER, \
 169            "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device"),
 170    FEATURE_ENTRY(VIRTIO_CONFIG_S_NEEDS_RESET, \
 171            "VIRTIO_CONFIG_S_NEEDS_RESET: Irrecoverable error, device needs "
 172            "reset"),
 173    FEATURE_ENTRY(VIRTIO_CONFIG_S_FAILED, \
 174            "VIRTIO_CONFIG_S_FAILED: Error in guest, device failed"),
 175    FEATURE_ENTRY(VIRTIO_CONFIG_S_ACKNOWLEDGE, \
 176            "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found"),
 177    { -1, "" }
 178};
 179
 180/* virtio-blk features mapping */
 181qmp_virtio_feature_map_t virtio_blk_feature_map[] = {
 182    FEATURE_ENTRY(VIRTIO_BLK_F_SIZE_MAX, \
 183            "VIRTIO_BLK_F_SIZE_MAX: Max segment size is size_max"),
 184    FEATURE_ENTRY(VIRTIO_BLK_F_SEG_MAX, \
 185            "VIRTIO_BLK_F_SEG_MAX: Max segments in a request is seg_max"),
 186    FEATURE_ENTRY(VIRTIO_BLK_F_GEOMETRY, \
 187            "VIRTIO_BLK_F_GEOMETRY: Legacy geometry available"),
 188    FEATURE_ENTRY(VIRTIO_BLK_F_RO, \
 189            "VIRTIO_BLK_F_RO: Device is read-only"),
 190    FEATURE_ENTRY(VIRTIO_BLK_F_BLK_SIZE, \
 191            "VIRTIO_BLK_F_BLK_SIZE: Block size of disk available"),
 192    FEATURE_ENTRY(VIRTIO_BLK_F_TOPOLOGY, \
 193            "VIRTIO_BLK_F_TOPOLOGY: Topology information available"),
 194    FEATURE_ENTRY(VIRTIO_BLK_F_MQ, \
 195            "VIRTIO_BLK_F_MQ: Multiqueue supported"),
 196    FEATURE_ENTRY(VIRTIO_BLK_F_DISCARD, \
 197            "VIRTIO_BLK_F_DISCARD: Discard command supported"),
 198    FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \
 199            "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"),
 200#ifndef VIRTIO_BLK_NO_LEGACY
 201    FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \
 202            "VIRTIO_BLK_F_BARRIER: Request barriers supported"),
 203    FEATURE_ENTRY(VIRTIO_BLK_F_SCSI, \
 204            "VIRTIO_BLK_F_SCSI: SCSI packet commands supported"),
 205    FEATURE_ENTRY(VIRTIO_BLK_F_FLUSH, \
 206            "VIRTIO_BLK_F_FLUSH: Flush command supported"),
 207    FEATURE_ENTRY(VIRTIO_BLK_F_CONFIG_WCE, \
 208            "VIRTIO_BLK_F_CONFIG_WCE: Cache writeback and writethrough modes "
 209            "supported"),
 210#endif /* !VIRTIO_BLK_NO_LEGACY */
 211    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 212            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 213    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 214            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 215            "negotiation supported"),
 216    { -1, "" }
 217};
 218
 219/* virtio-serial features mapping */
 220qmp_virtio_feature_map_t virtio_serial_feature_map[] = {
 221    FEATURE_ENTRY(VIRTIO_CONSOLE_F_SIZE, \
 222            "VIRTIO_CONSOLE_F_SIZE: Host providing console size"),
 223    FEATURE_ENTRY(VIRTIO_CONSOLE_F_MULTIPORT, \
 224            "VIRTIO_CONSOLE_F_MULTIPORT: Multiple ports for device supported"),
 225    FEATURE_ENTRY(VIRTIO_CONSOLE_F_EMERG_WRITE, \
 226            "VIRTIO_CONSOLE_F_EMERG_WRITE: Emergency write supported"),
 227    { -1, "" }
 228};
 229
 230/* virtio-gpu features mapping */
 231qmp_virtio_feature_map_t virtio_gpu_feature_map[] = {
 232    FEATURE_ENTRY(VIRTIO_GPU_F_VIRGL, \
 233            "VIRTIO_GPU_F_VIRGL: Virgl 3D mode supported"),
 234    FEATURE_ENTRY(VIRTIO_GPU_F_EDID, \
 235            "VIRTIO_GPU_F_EDID: EDID metadata supported"),
 236    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_UUID, \
 237            "VIRTIO_GPU_F_RESOURCE_UUID: Resource UUID assigning supported"),
 238    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_BLOB, \
 239            "VIRTIO_GPU_F_RESOURCE_BLOB: Size-based blob resources supported"),
 240    FEATURE_ENTRY(VIRTIO_GPU_F_CONTEXT_INIT, \
 241            "VIRTIO_GPU_F_CONTEXT_INIT: Context types and synchronization "
 242            "timelines supported"),
 243    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 244            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 245    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 246            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 247            "negotiation supported"),
 248    { -1, "" }
 249};
 250
 251/* virtio-input features mapping */
 252qmp_virtio_feature_map_t virtio_input_feature_map[] = {
 253    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 254            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 255    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 256            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 257            "negotiation supported"),
 258    { -1, "" }
 259};
 260
 261/* virtio-net features mapping */
 262qmp_virtio_feature_map_t virtio_net_feature_map[] = {
 263    FEATURE_ENTRY(VIRTIO_NET_F_CSUM, \
 264            "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum "
 265            "supported"),
 266    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_CSUM, \
 267            "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial "
 268            "checksum supported"),
 269    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
 270            "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading "
 271            "reconfig. supported"),
 272    FEATURE_ENTRY(VIRTIO_NET_F_MTU, \
 273            "VIRTIO_NET_F_MTU: Device max MTU reporting supported"),
 274    FEATURE_ENTRY(VIRTIO_NET_F_MAC, \
 275            "VIRTIO_NET_F_MAC: Device has given MAC address"),
 276    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO4, \
 277            "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4"),
 278    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO6, \
 279            "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6"),
 280    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ECN, \
 281            "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN"),
 282    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UFO, \
 283            "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO"),
 284    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO4, \
 285            "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4"),
 286    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO6, \
 287            "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6"),
 288    FEATURE_ENTRY(VIRTIO_NET_F_HOST_ECN, \
 289            "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN"),
 290    FEATURE_ENTRY(VIRTIO_NET_F_HOST_UFO, \
 291            "VIRTIO_NET_F_HOST_UFO: Device can receive UFO"),
 292    FEATURE_ENTRY(VIRTIO_NET_F_MRG_RXBUF, \
 293            "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"),
 294    FEATURE_ENTRY(VIRTIO_NET_F_STATUS, \
 295            "VIRTIO_NET_F_STATUS: Configuration status field available"),
 296    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VQ, \
 297            "VIRTIO_NET_F_CTRL_VQ: Control channel available"),
 298    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX, \
 299            "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported"),
 300    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VLAN, \
 301            "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported"),
 302    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX_EXTRA, \
 303            "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported"),
 304    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ANNOUNCE, \
 305            "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets "
 306            "supported"),
 307    FEATURE_ENTRY(VIRTIO_NET_F_MQ, \
 308            "VIRTIO_NET_F_MQ: Multiqueue with automatic receive steering "
 309            "supported"),
 310    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \
 311            "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control "
 312            "channel"),
 313    FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \
 314            "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"),
 315    FEATURE_ENTRY(VIRTIO_NET_F_RSS, \
 316            "VIRTIO_NET_F_RSS: RSS RX steering supported"),
 317    FEATURE_ENTRY(VIRTIO_NET_F_RSC_EXT, \
 318            "VIRTIO_NET_F_RSC_EXT: Extended coalescing info supported"),
 319    FEATURE_ENTRY(VIRTIO_NET_F_STANDBY, \
 320            "VIRTIO_NET_F_STANDBY: Device acting as standby for primary "
 321            "device with same MAC addr. supported"),
 322    FEATURE_ENTRY(VIRTIO_NET_F_SPEED_DUPLEX, \
 323            "VIRTIO_NET_F_SPEED_DUPLEX: Device set linkspeed and duplex"),
 324#ifndef VIRTIO_NET_NO_LEGACY
 325    FEATURE_ENTRY(VIRTIO_NET_F_GSO, \
 326            "VIRTIO_NET_F_GSO: Handling GSO-type packets supported"),
 327#endif /* !VIRTIO_NET_NO_LEGACY */
 328    FEATURE_ENTRY(VHOST_NET_F_VIRTIO_NET_HDR, \
 329            "VHOST_NET_F_VIRTIO_NET_HDR: Virtio-net headers for RX and TX "
 330            "packets supported"),
 331    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 332            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 333    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 334            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 335            "negotiation supported"),
 336    { -1, "" }
 337};
 338
 339/* virtio-scsi features mapping */
 340qmp_virtio_feature_map_t virtio_scsi_feature_map[] = {
 341    FEATURE_ENTRY(VIRTIO_SCSI_F_INOUT, \
 342            "VIRTIO_SCSI_F_INOUT: Requests including read and writable data "
 343            "buffers suppoted"),
 344    FEATURE_ENTRY(VIRTIO_SCSI_F_HOTPLUG, \
 345            "VIRTIO_SCSI_F_HOTPLUG: Reporting and handling hot-plug events "
 346            "supported"),
 347    FEATURE_ENTRY(VIRTIO_SCSI_F_CHANGE, \
 348            "VIRTIO_SCSI_F_CHANGE: Reporting and handling LUN changes "
 349            "supported"),
 350    FEATURE_ENTRY(VIRTIO_SCSI_F_T10_PI, \
 351            "VIRTIO_SCSI_F_T10_PI: T10 info included in request header"),
 352    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 353            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 354    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 355            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 356            "negotiation supported"),
 357    { -1, "" }
 358};
 359
 360/* virtio/vhost-user-fs features mapping */
 361qmp_virtio_feature_map_t virtio_fs_feature_map[] = {
 362    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 363            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 364    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 365            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 366            "negotiation supported"),
 367    { -1, "" }
 368};
 369
 370/* virtio/vhost-user-i2c features mapping */
 371qmp_virtio_feature_map_t virtio_i2c_feature_map[] = {
 372    FEATURE_ENTRY(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, \
 373            "VIRTIO_I2C_F_ZERO_LEGNTH_REQUEST: Zero length requests supported"),
 374    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 375            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 376    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 377            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 378            "negotiation supported"),
 379    { -1, "" }
 380};
 381
 382/* virtio/vhost-vsock features mapping */
 383qmp_virtio_feature_map_t virtio_vsock_feature_map[] = {
 384    FEATURE_ENTRY(VIRTIO_VSOCK_F_SEQPACKET, \
 385            "VIRTIO_VSOCK_F_SEQPACKET: SOCK_SEQPACKET supported"),
 386    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 387            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 388    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 389            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 390            "negotiation supported"),
 391    { -1, "" }
 392};
 393
 394/* virtio-balloon features mapping */
 395qmp_virtio_feature_map_t virtio_balloon_feature_map[] = {
 396    FEATURE_ENTRY(VIRTIO_BALLOON_F_MUST_TELL_HOST, \
 397            "VIRTIO_BALLOON_F_MUST_TELL_HOST: Tell host before reclaiming "
 398            "pages"),
 399    FEATURE_ENTRY(VIRTIO_BALLOON_F_STATS_VQ, \
 400            "VIRTIO_BALLOON_F_STATS_VQ: Guest memory stats VQ available"),
 401    FEATURE_ENTRY(VIRTIO_BALLOON_F_DEFLATE_ON_OOM, \
 402            "VIRTIO_BALLOON_F_DEFLATE_ON_OOM: Deflate balloon when guest OOM"),
 403    FEATURE_ENTRY(VIRTIO_BALLOON_F_FREE_PAGE_HINT, \
 404            "VIRTIO_BALLOON_F_FREE_PAGE_HINT: VQ reporting free pages enabled"),
 405    FEATURE_ENTRY(VIRTIO_BALLOON_F_PAGE_POISON, \
 406            "VIRTIO_BALLOON_F_PAGE_POISON: Guest page poisoning enabled"),
 407    FEATURE_ENTRY(VIRTIO_BALLOON_F_REPORTING, \
 408            "VIRTIO_BALLOON_F_REPORTING: Page reporting VQ enabled"),
 409    { -1, "" }
 410};
 411
 412/* virtio-crypto features mapping */
 413qmp_virtio_feature_map_t virtio_crypto_feature_map[] = {
 414    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 415            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 416    { -1, "" }
 417};
 418
 419/* virtio-iommu features mapping */
 420qmp_virtio_feature_map_t virtio_iommu_feature_map[] = {
 421    FEATURE_ENTRY(VIRTIO_IOMMU_F_INPUT_RANGE, \
 422            "VIRTIO_IOMMU_F_INPUT_RANGE: Range of available virtual addrs. "
 423            "available"),
 424    FEATURE_ENTRY(VIRTIO_IOMMU_F_DOMAIN_RANGE, \
 425            "VIRTIO_IOMMU_F_DOMAIN_RANGE: Number of supported domains "
 426            "available"),
 427    FEATURE_ENTRY(VIRTIO_IOMMU_F_MAP_UNMAP, \
 428            "VIRTIO_IOMMU_F_MAP_UNMAP: Map and unmap requests available"),
 429    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS, \
 430            "VIRTIO_IOMMU_F_BYPASS: Endpoints not attached to domains are in "
 431            "bypass mode"),
 432    FEATURE_ENTRY(VIRTIO_IOMMU_F_PROBE, \
 433            "VIRTIO_IOMMU_F_PROBE: Probe requests available"),
 434    FEATURE_ENTRY(VIRTIO_IOMMU_F_MMIO, \
 435            "VIRTIO_IOMMU_F_MMIO: VIRTIO_IOMMU_MAP_F_MMIO flag available"),
 436    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS_CONFIG, \
 437            "VIRTIO_IOMMU_F_BYPASS_CONFIG: Bypass field of IOMMU config "
 438            "available"),
 439    { -1, "" }
 440};
 441
 442/* virtio-mem features mapping */
 443qmp_virtio_feature_map_t virtio_mem_feature_map[] = {
 444#ifndef CONFIG_ACPI
 445    FEATURE_ENTRY(VIRTIO_MEM_F_ACPI_PXM, \
 446            "VIRTIO_MEM_F_ACPI_PXM: node_id is an ACPI PXM and is valid"),
 447#endif /* !CONFIG_ACPI */
 448    FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \
 449            "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be "
 450            "accessed"),
 451    { -1, "" }
 452};
 453
 454/* virtio-rng features mapping */
 455qmp_virtio_feature_map_t virtio_rng_feature_map[] = {
 456    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
 457            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
 458    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
 459            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
 460            "negotiation supported"),
 461    { -1, "" }
 462};
 463
 464/*
 465 * The alignment to use between consumer and producer parts of vring.
 466 * x86 pagesize again. This is the default, used by transports like PCI
 467 * which don't provide a means for the guest to tell the host the alignment.
 468 */
 469#define VIRTIO_PCI_VRING_ALIGN         4096
 470
 471typedef struct VRingDesc
 472{
 473    uint64_t addr;
 474    uint32_t len;
 475    uint16_t flags;
 476    uint16_t next;
 477} VRingDesc;
 478
 479typedef struct VRingPackedDesc {
 480    uint64_t addr;
 481    uint32_t len;
 482    uint16_t id;
 483    uint16_t flags;
 484} VRingPackedDesc;
 485
 486typedef struct VRingAvail
 487{
 488    uint16_t flags;
 489    uint16_t idx;
 490    uint16_t ring[];
 491} VRingAvail;
 492
 493typedef struct VRingUsedElem
 494{
 495    uint32_t id;
 496    uint32_t len;
 497} VRingUsedElem;
 498
 499typedef struct VRingUsed
 500{
 501    uint16_t flags;
 502    uint16_t idx;
 503    VRingUsedElem ring[];
 504} VRingUsed;
 505
 506typedef struct VRingMemoryRegionCaches {
 507    struct rcu_head rcu;
 508    MemoryRegionCache desc;
 509    MemoryRegionCache avail;
 510    MemoryRegionCache used;
 511} VRingMemoryRegionCaches;
 512
 513typedef struct VRing
 514{
 515    unsigned int num;
 516    unsigned int num_default;
 517    unsigned int align;
 518    hwaddr desc;
 519    hwaddr avail;
 520    hwaddr used;
 521    VRingMemoryRegionCaches *caches;
 522} VRing;
 523
 524typedef struct VRingPackedDescEvent {
 525    uint16_t off_wrap;
 526    uint16_t flags;
 527} VRingPackedDescEvent ;
 528
 529struct VirtQueue
 530{
 531    VRing vring;
 532    VirtQueueElement *used_elems;
 533
 534    /* Next head to pop */
 535    uint16_t last_avail_idx;
 536    bool last_avail_wrap_counter;
 537
 538    /* Last avail_idx read from VQ. */
 539    uint16_t shadow_avail_idx;
 540    bool shadow_avail_wrap_counter;
 541
 542    uint16_t used_idx;
 543    bool used_wrap_counter;
 544
 545    /* Last used index value we have signalled on */
 546    uint16_t signalled_used;
 547
 548    /* Last used index value we have signalled on */
 549    bool signalled_used_valid;
 550
 551    /* Notification enabled? */
 552    bool notification;
 553
 554    uint16_t queue_index;
 555
 556    unsigned int inuse;
 557
 558    uint16_t vector;
 559    VirtIOHandleOutput handle_output;
 560    VirtIODevice *vdev;
 561    EventNotifier guest_notifier;
 562    EventNotifier host_notifier;
 563    bool host_notifier_enabled;
 564    QLIST_ENTRY(VirtQueue) node;
 565};
 566
 567const char *virtio_device_names[] = {
 568    [VIRTIO_ID_NET] = "virtio-net",
 569    [VIRTIO_ID_BLOCK] = "virtio-blk",
 570    [VIRTIO_ID_CONSOLE] = "virtio-serial",
 571    [VIRTIO_ID_RNG] = "virtio-rng",
 572    [VIRTIO_ID_BALLOON] = "virtio-balloon",
 573    [VIRTIO_ID_IOMEM] = "virtio-iomem",
 574    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
 575    [VIRTIO_ID_SCSI] = "virtio-scsi",
 576    [VIRTIO_ID_9P] = "virtio-9p",
 577    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
 578    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
 579    [VIRTIO_ID_CAIF] = "virtio-caif",
 580    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
 581    [VIRTIO_ID_GPU] = "virtio-gpu",
 582    [VIRTIO_ID_CLOCK] = "virtio-clk",
 583    [VIRTIO_ID_INPUT] = "virtio-input",
 584    [VIRTIO_ID_VSOCK] = "vhost-vsock",
 585    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
 586    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
 587    [VIRTIO_ID_PSTORE] = "virtio-pstore",
 588    [VIRTIO_ID_IOMMU] = "virtio-iommu",
 589    [VIRTIO_ID_MEM] = "virtio-mem",
 590    [VIRTIO_ID_SOUND] = "virtio-sound",
 591    [VIRTIO_ID_FS] = "virtio-user-fs",
 592    [VIRTIO_ID_PMEM] = "virtio-pmem",
 593    [VIRTIO_ID_RPMB] = "virtio-rpmb",
 594    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
 595    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
 596    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
 597    [VIRTIO_ID_SCMI] = "virtio-scmi",
 598    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
 599    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
 600    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
 601    [VIRTIO_ID_CAN] = "virtio-can",
 602    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
 603    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
 604    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
 605    [VIRTIO_ID_BT] = "virtio-bluetooth",
 606    [VIRTIO_ID_GPIO] = "virtio-gpio"
 607};
 608
 609static const char *virtio_id_to_name(uint16_t device_id)
 610{
 611    assert(device_id < G_N_ELEMENTS(virtio_device_names));
 612    const char *name = virtio_device_names[device_id];
 613    assert(name != NULL);
 614    return name;
 615}
 616
 617/* Called within call_rcu().  */
 618static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 619{
 620    assert(caches != NULL);
 621    address_space_cache_destroy(&caches->desc);
 622    address_space_cache_destroy(&caches->avail);
 623    address_space_cache_destroy(&caches->used);
 624    g_free(caches);
 625}
 626
 627static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 628{
 629    VRingMemoryRegionCaches *caches;
 630
 631    caches = qatomic_read(&vq->vring.caches);
 632    qatomic_rcu_set(&vq->vring.caches, NULL);
 633    if (caches) {
 634        call_rcu(caches, virtio_free_region_cache, rcu);
 635    }
 636}
 637
 638static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 639{
 640    VirtQueue *vq = &vdev->vq[n];
 641    VRingMemoryRegionCaches *old = vq->vring.caches;
 642    VRingMemoryRegionCaches *new = NULL;
 643    hwaddr addr, size;
 644    int64_t len;
 645    bool packed;
 646
 647
 648    addr = vq->vring.desc;
 649    if (!addr) {
 650        goto out_no_cache;
 651    }
 652    new = g_new0(VRingMemoryRegionCaches, 1);
 653    size = virtio_queue_get_desc_size(vdev, n);
 654    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 655                                   true : false;
 656    len = address_space_cache_init(&new->desc, vdev->dma_as,
 657                                   addr, size, packed);
 658    if (len < size) {
 659        virtio_error(vdev, "Cannot map desc");
 660        goto err_desc;
 661    }
 662
 663    size = virtio_queue_get_used_size(vdev, n);
 664    len = address_space_cache_init(&new->used, vdev->dma_as,
 665                                   vq->vring.used, size, true);
 666    if (len < size) {
 667        virtio_error(vdev, "Cannot map used");
 668        goto err_used;
 669    }
 670
 671    size = virtio_queue_get_avail_size(vdev, n);
 672    len = address_space_cache_init(&new->avail, vdev->dma_as,
 673                                   vq->vring.avail, size, false);
 674    if (len < size) {
 675        virtio_error(vdev, "Cannot map avail");
 676        goto err_avail;
 677    }
 678
 679    qatomic_rcu_set(&vq->vring.caches, new);
 680    if (old) {
 681        call_rcu(old, virtio_free_region_cache, rcu);
 682    }
 683    return;
 684
 685err_avail:
 686    address_space_cache_destroy(&new->avail);
 687err_used:
 688    address_space_cache_destroy(&new->used);
 689err_desc:
 690    address_space_cache_destroy(&new->desc);
 691out_no_cache:
 692    g_free(new);
 693    virtio_virtqueue_reset_region_cache(vq);
 694}
 695
 696/* virt queue functions */
 697void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 698{
 699    VRing *vring = &vdev->vq[n].vring;
 700
 701    if (!vring->num || !vring->desc || !vring->align) {
 702        /* not yet setup -> nothing to do */
 703        return;
 704    }
 705    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 706    vring->used = vring_align(vring->avail +
 707                              offsetof(VRingAvail, ring[vring->num]),
 708                              vring->align);
 709    virtio_init_region_cache(vdev, n);
 710}
 711
 712/* Called within rcu_read_lock().  */
 713static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 714                                  MemoryRegionCache *cache, int i)
 715{
 716    address_space_read_cached(cache, i * sizeof(VRingDesc),
 717                              desc, sizeof(VRingDesc));
 718    virtio_tswap64s(vdev, &desc->addr);
 719    virtio_tswap32s(vdev, &desc->len);
 720    virtio_tswap16s(vdev, &desc->flags);
 721    virtio_tswap16s(vdev, &desc->next);
 722}
 723
 724static void vring_packed_event_read(VirtIODevice *vdev,
 725                                    MemoryRegionCache *cache,
 726                                    VRingPackedDescEvent *e)
 727{
 728    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 729    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 730
 731    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 732    /* Make sure flags is seen before off_wrap */
 733    smp_rmb();
 734    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 735    virtio_tswap16s(vdev, &e->flags);
 736}
 737
 738static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 739                                        MemoryRegionCache *cache,
 740                                        uint16_t off_wrap)
 741{
 742    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 743
 744    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 745    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 746}
 747
 748static void vring_packed_flags_write(VirtIODevice *vdev,
 749                                     MemoryRegionCache *cache, uint16_t flags)
 750{
 751    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 752
 753    virtio_stw_phys_cached(vdev, cache, off, flags);
 754    address_space_cache_invalidate(cache, off, sizeof(flags));
 755}
 756
 757/* Called within rcu_read_lock().  */
 758static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 759{
 760    return qatomic_rcu_read(&vq->vring.caches);
 761}
 762
 763/* Called within rcu_read_lock().  */
 764static inline uint16_t vring_avail_flags(VirtQueue *vq)
 765{
 766    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 767    hwaddr pa = offsetof(VRingAvail, flags);
 768
 769    if (!caches) {
 770        return 0;
 771    }
 772
 773    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 774}
 775
 776/* Called within rcu_read_lock().  */
 777static inline uint16_t vring_avail_idx(VirtQueue *vq)
 778{
 779    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 780    hwaddr pa = offsetof(VRingAvail, idx);
 781
 782    if (!caches) {
 783        return 0;
 784    }
 785
 786    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 787    return vq->shadow_avail_idx;
 788}
 789
 790/* Called within rcu_read_lock().  */
 791static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 792{
 793    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 794    hwaddr pa = offsetof(VRingAvail, ring[i]);
 795
 796    if (!caches) {
 797        return 0;
 798    }
 799
 800    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 801}
 802
 803/* Called within rcu_read_lock().  */
 804static inline uint16_t vring_get_used_event(VirtQueue *vq)
 805{
 806    return vring_avail_ring(vq, vq->vring.num);
 807}
 808
 809/* Called within rcu_read_lock().  */
 810static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 811                                    int i)
 812{
 813    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 814    hwaddr pa = offsetof(VRingUsed, ring[i]);
 815
 816    if (!caches) {
 817        return;
 818    }
 819
 820    virtio_tswap32s(vq->vdev, &uelem->id);
 821    virtio_tswap32s(vq->vdev, &uelem->len);
 822    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 823    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 824}
 825
 826/* Called within rcu_read_lock(). */
 827static inline uint16_t vring_used_flags(VirtQueue *vq)
 828{
 829    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 830    hwaddr pa = offsetof(VRingUsed, flags);
 831
 832    if (!caches) {
 833        return 0;
 834    }
 835
 836    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 837}
 838
 839/* Called within rcu_read_lock().  */
 840static uint16_t vring_used_idx(VirtQueue *vq)
 841{
 842    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 843    hwaddr pa = offsetof(VRingUsed, idx);
 844
 845    if (!caches) {
 846        return 0;
 847    }
 848
 849    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 850}
 851
 852/* Called within rcu_read_lock().  */
 853static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 854{
 855    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 856    hwaddr pa = offsetof(VRingUsed, idx);
 857
 858    if (caches) {
 859        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 860        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 861    }
 862
 863    vq->used_idx = val;
 864}
 865
 866/* Called within rcu_read_lock().  */
 867static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 868{
 869    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 870    VirtIODevice *vdev = vq->vdev;
 871    hwaddr pa = offsetof(VRingUsed, flags);
 872    uint16_t flags;
 873
 874    if (!caches) {
 875        return;
 876    }
 877
 878    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 879    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 880    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 881}
 882
 883/* Called within rcu_read_lock().  */
 884static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 885{
 886    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 887    VirtIODevice *vdev = vq->vdev;
 888    hwaddr pa = offsetof(VRingUsed, flags);
 889    uint16_t flags;
 890
 891    if (!caches) {
 892        return;
 893    }
 894
 895    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 896    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 897    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 898}
 899
 900/* Called within rcu_read_lock().  */
 901static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 902{
 903    VRingMemoryRegionCaches *caches;
 904    hwaddr pa;
 905    if (!vq->notification) {
 906        return;
 907    }
 908
 909    caches = vring_get_region_caches(vq);
 910    if (!caches) {
 911        return;
 912    }
 913
 914    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 915    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 916    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 917}
 918
 919static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 920{
 921    RCU_READ_LOCK_GUARD();
 922
 923    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 924        vring_set_avail_event(vq, vring_avail_idx(vq));
 925    } else if (enable) {
 926        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 927    } else {
 928        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 929    }
 930    if (enable) {
 931        /* Expose avail event/used flags before caller checks the avail idx. */
 932        smp_mb();
 933    }
 934}
 935
 936static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 937{
 938    uint16_t off_wrap;
 939    VRingPackedDescEvent e;
 940    VRingMemoryRegionCaches *caches;
 941
 942    RCU_READ_LOCK_GUARD();
 943    caches = vring_get_region_caches(vq);
 944    if (!caches) {
 945        return;
 946    }
 947
 948    vring_packed_event_read(vq->vdev, &caches->used, &e);
 949
 950    if (!enable) {
 951        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 952    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 953        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 954        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 955        /* Make sure off_wrap is wrote before flags */
 956        smp_wmb();
 957        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 958    } else {
 959        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 960    }
 961
 962    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 963    if (enable) {
 964        /* Expose avail event/used flags before caller checks the avail idx. */
 965        smp_mb();
 966    }
 967}
 968
 969bool virtio_queue_get_notification(VirtQueue *vq)
 970{
 971    return vq->notification;
 972}
 973
 974void virtio_queue_set_notification(VirtQueue *vq, int enable)
 975{
 976    vq->notification = enable;
 977
 978    if (!vq->vring.desc) {
 979        return;
 980    }
 981
 982    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 983        virtio_queue_packed_set_notification(vq, enable);
 984    } else {
 985        virtio_queue_split_set_notification(vq, enable);
 986    }
 987}
 988
 989int virtio_queue_ready(VirtQueue *vq)
 990{
 991    return vq->vring.avail != 0;
 992}
 993
 994static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 995                                         uint16_t *flags,
 996                                         MemoryRegionCache *cache,
 997                                         int i)
 998{
 999    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1000
1001    *flags = virtio_lduw_phys_cached(vdev, cache, off);
1002}
1003
1004static void vring_packed_desc_read(VirtIODevice *vdev,
1005                                   VRingPackedDesc *desc,
1006                                   MemoryRegionCache *cache,
1007                                   int i, bool strict_order)
1008{
1009    hwaddr off = i * sizeof(VRingPackedDesc);
1010
1011    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
1012
1013    if (strict_order) {
1014        /* Make sure flags is read before the rest fields. */
1015        smp_rmb();
1016    }
1017
1018    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
1019                              &desc->addr, sizeof(desc->addr));
1020    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
1021                              &desc->id, sizeof(desc->id));
1022    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
1023                              &desc->len, sizeof(desc->len));
1024    virtio_tswap64s(vdev, &desc->addr);
1025    virtio_tswap16s(vdev, &desc->id);
1026    virtio_tswap32s(vdev, &desc->len);
1027}
1028
1029static void vring_packed_desc_write_data(VirtIODevice *vdev,
1030                                         VRingPackedDesc *desc,
1031                                         MemoryRegionCache *cache,
1032                                         int i)
1033{
1034    hwaddr off_id = i * sizeof(VRingPackedDesc) +
1035                    offsetof(VRingPackedDesc, id);
1036    hwaddr off_len = i * sizeof(VRingPackedDesc) +
1037                    offsetof(VRingPackedDesc, len);
1038
1039    virtio_tswap32s(vdev, &desc->len);
1040    virtio_tswap16s(vdev, &desc->id);
1041    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
1042    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
1043    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
1044    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
1045}
1046
1047static void vring_packed_desc_write_flags(VirtIODevice *vdev,
1048                                          VRingPackedDesc *desc,
1049                                          MemoryRegionCache *cache,
1050                                          int i)
1051{
1052    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
1053
1054    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
1055    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
1056}
1057
1058static void vring_packed_desc_write(VirtIODevice *vdev,
1059                                    VRingPackedDesc *desc,
1060                                    MemoryRegionCache *cache,
1061                                    int i, bool strict_order)
1062{
1063    vring_packed_desc_write_data(vdev, desc, cache, i);
1064    if (strict_order) {
1065        /* Make sure data is wrote before flags. */
1066        smp_wmb();
1067    }
1068    vring_packed_desc_write_flags(vdev, desc, cache, i);
1069}
1070
1071static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
1072{
1073    bool avail, used;
1074
1075    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1076    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1077    return (avail != used) && (avail == wrap_counter);
1078}
1079
1080/* Fetch avail_idx from VQ memory only when we really need to know if
1081 * guest has added some buffers.
1082 * Called within rcu_read_lock().  */
1083static int virtio_queue_empty_rcu(VirtQueue *vq)
1084{
1085    if (virtio_device_disabled(vq->vdev)) {
1086        return 1;
1087    }
1088
1089    if (unlikely(!vq->vring.avail)) {
1090        return 1;
1091    }
1092
1093    if (vq->shadow_avail_idx != vq->last_avail_idx) {
1094        return 0;
1095    }
1096
1097    return vring_avail_idx(vq) == vq->last_avail_idx;
1098}
1099
1100static int virtio_queue_split_empty(VirtQueue *vq)
1101{
1102    bool empty;
1103
1104    if (virtio_device_disabled(vq->vdev)) {
1105        return 1;
1106    }
1107
1108    if (unlikely(!vq->vring.avail)) {
1109        return 1;
1110    }
1111
1112    if (vq->shadow_avail_idx != vq->last_avail_idx) {
1113        return 0;
1114    }
1115
1116    RCU_READ_LOCK_GUARD();
1117    empty = vring_avail_idx(vq) == vq->last_avail_idx;
1118    return empty;
1119}
1120
1121/* Called within rcu_read_lock().  */
1122static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
1123{
1124    struct VRingPackedDesc desc;
1125    VRingMemoryRegionCaches *cache;
1126
1127    if (unlikely(!vq->vring.desc)) {
1128        return 1;
1129    }
1130
1131    cache = vring_get_region_caches(vq);
1132    if (!cache) {
1133        return 1;
1134    }
1135
1136    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
1137                                 vq->last_avail_idx);
1138
1139    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
1140}
1141
1142static int virtio_queue_packed_empty(VirtQueue *vq)
1143{
1144    RCU_READ_LOCK_GUARD();
1145    return virtio_queue_packed_empty_rcu(vq);
1146}
1147
1148int virtio_queue_empty(VirtQueue *vq)
1149{
1150    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1151        return virtio_queue_packed_empty(vq);
1152    } else {
1153        return virtio_queue_split_empty(vq);
1154    }
1155}
1156
1157static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
1158                               unsigned int len)
1159{
1160    AddressSpace *dma_as = vq->vdev->dma_as;
1161    unsigned int offset;
1162    int i;
1163
1164    offset = 0;
1165    for (i = 0; i < elem->in_num; i++) {
1166        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
1167
1168        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
1169                         elem->in_sg[i].iov_len,
1170                         DMA_DIRECTION_FROM_DEVICE, size);
1171
1172        offset += size;
1173    }
1174
1175    for (i = 0; i < elem->out_num; i++)
1176        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
1177                         elem->out_sg[i].iov_len,
1178                         DMA_DIRECTION_TO_DEVICE,
1179                         elem->out_sg[i].iov_len);
1180}
1181
1182/* virtqueue_detach_element:
1183 * @vq: The #VirtQueue
1184 * @elem: The #VirtQueueElement
1185 * @len: number of bytes written
1186 *
1187 * Detach the element from the virtqueue.  This function is suitable for device
1188 * reset or other situations where a #VirtQueueElement is simply freed and will
1189 * not be pushed or discarded.
1190 */
1191void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
1192                              unsigned int len)
1193{
1194    vq->inuse -= elem->ndescs;
1195    virtqueue_unmap_sg(vq, elem, len);
1196}
1197
1198static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
1199{
1200    vq->last_avail_idx -= num;
1201}
1202
1203static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
1204{
1205    if (vq->last_avail_idx < num) {
1206        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
1207        vq->last_avail_wrap_counter ^= 1;
1208    } else {
1209        vq->last_avail_idx -= num;
1210    }
1211}
1212
1213/* virtqueue_unpop:
1214 * @vq: The #VirtQueue
1215 * @elem: The #VirtQueueElement
1216 * @len: number of bytes written
1217 *
1218 * Pretend the most recent element wasn't popped from the virtqueue.  The next
1219 * call to virtqueue_pop() will refetch the element.
1220 */
1221void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
1222                     unsigned int len)
1223{
1224
1225    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1226        virtqueue_packed_rewind(vq, 1);
1227    } else {
1228        virtqueue_split_rewind(vq, 1);
1229    }
1230
1231    virtqueue_detach_element(vq, elem, len);
1232}
1233
1234/* virtqueue_rewind:
1235 * @vq: The #VirtQueue
1236 * @num: Number of elements to push back
1237 *
1238 * Pretend that elements weren't popped from the virtqueue.  The next
1239 * virtqueue_pop() will refetch the oldest element.
1240 *
1241 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
1242 *
1243 * Returns: true on success, false if @num is greater than the number of in use
1244 * elements.
1245 */
1246bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
1247{
1248    if (num > vq->inuse) {
1249        return false;
1250    }
1251
1252    vq->inuse -= num;
1253    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1254        virtqueue_packed_rewind(vq, num);
1255    } else {
1256        virtqueue_split_rewind(vq, num);
1257    }
1258    return true;
1259}
1260
1261static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
1262                    unsigned int len, unsigned int idx)
1263{
1264    VRingUsedElem uelem;
1265
1266    if (unlikely(!vq->vring.used)) {
1267        return;
1268    }
1269
1270    idx = (idx + vq->used_idx) % vq->vring.num;
1271
1272    uelem.id = elem->index;
1273    uelem.len = len;
1274    vring_used_write(vq, &uelem, idx);
1275}
1276
1277static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
1278                                  unsigned int len, unsigned int idx)
1279{
1280    vq->used_elems[idx].index = elem->index;
1281    vq->used_elems[idx].len = len;
1282    vq->used_elems[idx].ndescs = elem->ndescs;
1283}
1284
1285static void virtqueue_packed_fill_desc(VirtQueue *vq,
1286                                       const VirtQueueElement *elem,
1287                                       unsigned int idx,
1288                                       bool strict_order)
1289{
1290    uint16_t head;
1291    VRingMemoryRegionCaches *caches;
1292    VRingPackedDesc desc = {
1293        .id = elem->index,
1294        .len = elem->len,
1295    };
1296    bool wrap_counter = vq->used_wrap_counter;
1297
1298    if (unlikely(!vq->vring.desc)) {
1299        return;
1300    }
1301
1302    head = vq->used_idx + idx;
1303    if (head >= vq->vring.num) {
1304        head -= vq->vring.num;
1305        wrap_counter ^= 1;
1306    }
1307    if (wrap_counter) {
1308        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
1309        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
1310    } else {
1311        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
1312        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
1313    }
1314
1315    caches = vring_get_region_caches(vq);
1316    if (!caches) {
1317        return;
1318    }
1319
1320    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
1321}
1322
1323/* Called within rcu_read_lock().  */
1324void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
1325                    unsigned int len, unsigned int idx)
1326{
1327    trace_virtqueue_fill(vq, elem, len, idx);
1328
1329    virtqueue_unmap_sg(vq, elem, len);
1330
1331    if (virtio_device_disabled(vq->vdev)) {
1332        return;
1333    }
1334
1335    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1336        virtqueue_packed_fill(vq, elem, len, idx);
1337    } else {
1338        virtqueue_split_fill(vq, elem, len, idx);
1339    }
1340}
1341
1342/* Called within rcu_read_lock().  */
1343static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
1344{
1345    uint16_t old, new;
1346
1347    if (unlikely(!vq->vring.used)) {
1348        return;
1349    }
1350
1351    /* Make sure buffer is written before we update index. */
1352    smp_wmb();
1353    trace_virtqueue_flush(vq, count);
1354    old = vq->used_idx;
1355    new = old + count;
1356    vring_used_idx_set(vq, new);
1357    vq->inuse -= count;
1358    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
1359        vq->signalled_used_valid = false;
1360}
1361
1362static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
1363{
1364    unsigned int i, ndescs = 0;
1365
1366    if (unlikely(!vq->vring.desc)) {
1367        return;
1368    }
1369
1370    for (i = 1; i < count; i++) {
1371        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
1372        ndescs += vq->used_elems[i].ndescs;
1373    }
1374    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
1375    ndescs += vq->used_elems[0].ndescs;
1376
1377    vq->inuse -= ndescs;
1378    vq->used_idx += ndescs;
1379    if (vq->used_idx >= vq->vring.num) {
1380        vq->used_idx -= vq->vring.num;
1381        vq->used_wrap_counter ^= 1;
1382        vq->signalled_used_valid = false;
1383    }
1384}
1385
1386void virtqueue_flush(VirtQueue *vq, unsigned int count)
1387{
1388    if (virtio_device_disabled(vq->vdev)) {
1389        vq->inuse -= count;
1390        return;
1391    }
1392
1393    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1394        virtqueue_packed_flush(vq, count);
1395    } else {
1396        virtqueue_split_flush(vq, count);
1397    }
1398}
1399
1400void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
1401                    unsigned int len)
1402{
1403    RCU_READ_LOCK_GUARD();
1404    virtqueue_fill(vq, elem, len, 0);
1405    virtqueue_flush(vq, 1);
1406}
1407
1408/* Called within rcu_read_lock().  */
1409static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1410{
1411    uint16_t num_heads = vring_avail_idx(vq) - idx;
1412
1413    /* Check it isn't doing very strange things with descriptor numbers. */
1414    if (num_heads > vq->vring.num) {
1415        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1416                     idx, vq->shadow_avail_idx);
1417        return -EINVAL;
1418    }
1419    /* On success, callers read a descriptor at vq->last_avail_idx.
1420     * Make sure descriptor read does not bypass avail index read. */
1421    if (num_heads) {
1422        smp_rmb();
1423    }
1424
1425    return num_heads;
1426}
1427
1428/* Called within rcu_read_lock().  */
1429static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1430                               unsigned int *head)
1431{
1432    /* Grab the next descriptor number they're advertising, and increment
1433     * the index we've seen. */
1434    *head = vring_avail_ring(vq, idx % vq->vring.num);
1435
1436    /* If their number is silly, that's a fatal mistake. */
1437    if (*head >= vq->vring.num) {
1438        virtio_error(vq->vdev, "Guest says index %u is available", *head);
1439        return false;
1440    }
1441
1442    return true;
1443}
1444
1445enum {
1446    VIRTQUEUE_READ_DESC_ERROR = -1,
1447    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1448    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1449};
1450
1451static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1452                                          MemoryRegionCache *desc_cache,
1453                                          unsigned int max, unsigned int *next)
1454{
1455    /* If this descriptor says it doesn't chain, we're done. */
1456    if (!(desc->flags & VRING_DESC_F_NEXT)) {
1457        return VIRTQUEUE_READ_DESC_DONE;
1458    }
1459
1460    /* Check they're not leading us off end of descriptors. */
1461    *next = desc->next;
1462    /* Make sure compiler knows to grab that: we don't want it changing! */
1463    smp_wmb();
1464
1465    if (*next >= max) {
1466        virtio_error(vdev, "Desc next is %u", *next);
1467        return VIRTQUEUE_READ_DESC_ERROR;
1468    }
1469
1470    vring_split_desc_read(vdev, desc, desc_cache, *next);
1471    return VIRTQUEUE_READ_DESC_MORE;
1472}
1473
1474/* Called within rcu_read_lock().  */
1475static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1476                            unsigned int *in_bytes, unsigned int *out_bytes,
1477                            unsigned max_in_bytes, unsigned max_out_bytes,
1478                            VRingMemoryRegionCaches *caches)
1479{
1480    VirtIODevice *vdev = vq->vdev;
1481    unsigned int idx;
1482    unsigned int total_bufs, in_total, out_total;
1483    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1484    int64_t len = 0;
1485    int rc;
1486
1487    idx = vq->last_avail_idx;
1488    total_bufs = in_total = out_total = 0;
1489
1490    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1491        MemoryRegionCache *desc_cache = &caches->desc;
1492        unsigned int num_bufs;
1493        VRingDesc desc;
1494        unsigned int i;
1495        unsigned int max = vq->vring.num;
1496
1497        num_bufs = total_bufs;
1498
1499        if (!virtqueue_get_head(vq, idx++, &i)) {
1500            goto err;
1501        }
1502
1503        vring_split_desc_read(vdev, &desc, desc_cache, i);
1504
1505        if (desc.flags & VRING_DESC_F_INDIRECT) {
1506            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1507                virtio_error(vdev, "Invalid size for indirect buffer table");
1508                goto err;
1509            }
1510
1511            /* If we've got too many, that implies a descriptor loop. */
1512            if (num_bufs >= max) {
1513                virtio_error(vdev, "Looped descriptor");
1514                goto err;
1515            }
1516
1517            /* loop over the indirect descriptor table */
1518            len = address_space_cache_init(&indirect_desc_cache,
1519                                           vdev->dma_as,
1520                                           desc.addr, desc.len, false);
1521            desc_cache = &indirect_desc_cache;
1522            if (len < desc.len) {
1523                virtio_error(vdev, "Cannot map indirect buffer");
1524                goto err;
1525            }
1526
1527            max = desc.len / sizeof(VRingDesc);
1528            num_bufs = i = 0;
1529            vring_split_desc_read(vdev, &desc, desc_cache, i);
1530        }
1531
1532        do {
1533            /* If we've got too many, that implies a descriptor loop. */
1534            if (++num_bufs > max) {
1535                virtio_error(vdev, "Looped descriptor");
1536                goto err;
1537            }
1538
1539            if (desc.flags & VRING_DESC_F_WRITE) {
1540                in_total += desc.len;
1541            } else {
1542                out_total += desc.len;
1543            }
1544            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1545                goto done;
1546            }
1547
1548            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1549        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1550
1551        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1552            goto err;
1553        }
1554
1555        if (desc_cache == &indirect_desc_cache) {
1556            address_space_cache_destroy(&indirect_desc_cache);
1557            total_bufs++;
1558        } else {
1559            total_bufs = num_bufs;
1560        }
1561    }
1562
1563    if (rc < 0) {
1564        goto err;
1565    }
1566
1567done:
1568    address_space_cache_destroy(&indirect_desc_cache);
1569    if (in_bytes) {
1570        *in_bytes = in_total;
1571    }
1572    if (out_bytes) {
1573        *out_bytes = out_total;
1574    }
1575    return;
1576
1577err:
1578    in_total = out_total = 0;
1579    goto done;
1580}
1581
1582static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1583                                           VRingPackedDesc *desc,
1584                                           MemoryRegionCache
1585                                           *desc_cache,
1586                                           unsigned int max,
1587                                           unsigned int *next,
1588                                           bool indirect)
1589{
1590    /* If this descriptor says it doesn't chain, we're done. */
1591    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1592        return VIRTQUEUE_READ_DESC_DONE;
1593    }
1594
1595    ++*next;
1596    if (*next == max) {
1597        if (indirect) {
1598            return VIRTQUEUE_READ_DESC_DONE;
1599        } else {
1600            (*next) -= vq->vring.num;
1601        }
1602    }
1603
1604    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1605    return VIRTQUEUE_READ_DESC_MORE;
1606}
1607
1608/* Called within rcu_read_lock().  */
1609static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1610                                             unsigned int *in_bytes,
1611                                             unsigned int *out_bytes,
1612                                             unsigned max_in_bytes,
1613                                             unsigned max_out_bytes,
1614                                             VRingMemoryRegionCaches *caches)
1615{
1616    VirtIODevice *vdev = vq->vdev;
1617    unsigned int idx;
1618    unsigned int total_bufs, in_total, out_total;
1619    MemoryRegionCache *desc_cache;
1620    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1621    int64_t len = 0;
1622    VRingPackedDesc desc;
1623    bool wrap_counter;
1624
1625    idx = vq->last_avail_idx;
1626    wrap_counter = vq->last_avail_wrap_counter;
1627    total_bufs = in_total = out_total = 0;
1628
1629    for (;;) {
1630        unsigned int num_bufs = total_bufs;
1631        unsigned int i = idx;
1632        int rc;
1633        unsigned int max = vq->vring.num;
1634
1635        desc_cache = &caches->desc;
1636
1637        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1638        if (!is_desc_avail(desc.flags, wrap_counter)) {
1639            break;
1640        }
1641
1642        if (desc.flags & VRING_DESC_F_INDIRECT) {
1643            if (desc.len % sizeof(VRingPackedDesc)) {
1644                virtio_error(vdev, "Invalid size for indirect buffer table");
1645                goto err;
1646            }
1647
1648            /* If we've got too many, that implies a descriptor loop. */
1649            if (num_bufs >= max) {
1650                virtio_error(vdev, "Looped descriptor");
1651                goto err;
1652            }
1653
1654            /* loop over the indirect descriptor table */
1655            len = address_space_cache_init(&indirect_desc_cache,
1656                                           vdev->dma_as,
1657                                           desc.addr, desc.len, false);
1658            desc_cache = &indirect_desc_cache;
1659            if (len < desc.len) {
1660                virtio_error(vdev, "Cannot map indirect buffer");
1661                goto err;
1662            }
1663
1664            max = desc.len / sizeof(VRingPackedDesc);
1665            num_bufs = i = 0;
1666            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1667        }
1668
1669        do {
1670            /* If we've got too many, that implies a descriptor loop. */
1671            if (++num_bufs > max) {
1672                virtio_error(vdev, "Looped descriptor");
1673                goto err;
1674            }
1675
1676            if (desc.flags & VRING_DESC_F_WRITE) {
1677                in_total += desc.len;
1678            } else {
1679                out_total += desc.len;
1680            }
1681            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1682                goto done;
1683            }
1684
1685            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1686                                                 &i, desc_cache ==
1687                                                 &indirect_desc_cache);
1688        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1689
1690        if (desc_cache == &indirect_desc_cache) {
1691            address_space_cache_destroy(&indirect_desc_cache);
1692            total_bufs++;
1693            idx++;
1694        } else {
1695            idx += num_bufs - total_bufs;
1696            total_bufs = num_bufs;
1697        }
1698
1699        if (idx >= vq->vring.num) {
1700            idx -= vq->vring.num;
1701            wrap_counter ^= 1;
1702        }
1703    }
1704
1705    /* Record the index and wrap counter for a kick we want */
1706    vq->shadow_avail_idx = idx;
1707    vq->shadow_avail_wrap_counter = wrap_counter;
1708done:
1709    address_space_cache_destroy(&indirect_desc_cache);
1710    if (in_bytes) {
1711        *in_bytes = in_total;
1712    }
1713    if (out_bytes) {
1714        *out_bytes = out_total;
1715    }
1716    return;
1717
1718err:
1719    in_total = out_total = 0;
1720    goto done;
1721}
1722
1723void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1724                               unsigned int *out_bytes,
1725                               unsigned max_in_bytes, unsigned max_out_bytes)
1726{
1727    uint16_t desc_size;
1728    VRingMemoryRegionCaches *caches;
1729
1730    RCU_READ_LOCK_GUARD();
1731
1732    if (unlikely(!vq->vring.desc)) {
1733        goto err;
1734    }
1735
1736    caches = vring_get_region_caches(vq);
1737    if (!caches) {
1738        goto err;
1739    }
1740
1741    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1742                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1743    if (caches->desc.len < vq->vring.num * desc_size) {
1744        virtio_error(vq->vdev, "Cannot map descriptor ring");
1745        goto err;
1746    }
1747
1748    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1749        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1750                                         max_in_bytes, max_out_bytes,
1751                                         caches);
1752    } else {
1753        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1754                                        max_in_bytes, max_out_bytes,
1755                                        caches);
1756    }
1757
1758    return;
1759err:
1760    if (in_bytes) {
1761        *in_bytes = 0;
1762    }
1763    if (out_bytes) {
1764        *out_bytes = 0;
1765    }
1766}
1767
1768int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1769                          unsigned int out_bytes)
1770{
1771    unsigned int in_total, out_total;
1772
1773    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1774    return in_bytes <= in_total && out_bytes <= out_total;
1775}
1776
1777static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1778                               hwaddr *addr, struct iovec *iov,
1779                               unsigned int max_num_sg, bool is_write,
1780                               hwaddr pa, size_t sz)
1781{
1782    bool ok = false;
1783    unsigned num_sg = *p_num_sg;
1784    assert(num_sg <= max_num_sg);
1785
1786    if (!sz) {
1787        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1788        goto out;
1789    }
1790
1791    while (sz) {
1792        hwaddr len = sz;
1793
1794        if (num_sg == max_num_sg) {
1795            virtio_error(vdev, "virtio: too many write descriptors in "
1796                               "indirect table");
1797            goto out;
1798        }
1799
1800        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1801                                              is_write ?
1802                                              DMA_DIRECTION_FROM_DEVICE :
1803                                              DMA_DIRECTION_TO_DEVICE,
1804                                              MEMTXATTRS_UNSPECIFIED);
1805        if (!iov[num_sg].iov_base) {
1806            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1807            goto out;
1808        }
1809
1810        iov[num_sg].iov_len = len;
1811        addr[num_sg] = pa;
1812
1813        sz -= len;
1814        pa += len;
1815        num_sg++;
1816    }
1817    ok = true;
1818
1819out:
1820    *p_num_sg = num_sg;
1821    return ok;
1822}
1823
1824/* Only used by error code paths before we have a VirtQueueElement (therefore
1825 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1826 * yet.
1827 */
1828static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1829                                    struct iovec *iov)
1830{
1831    unsigned int i;
1832
1833    for (i = 0; i < out_num + in_num; i++) {
1834        int is_write = i >= out_num;
1835
1836        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1837        iov++;
1838    }
1839}
1840
1841static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1842                                hwaddr *addr, unsigned int num_sg,
1843                                bool is_write)
1844{
1845    unsigned int i;
1846    hwaddr len;
1847
1848    for (i = 0; i < num_sg; i++) {
1849        len = sg[i].iov_len;
1850        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1851                                        addr[i], &len, is_write ?
1852                                        DMA_DIRECTION_FROM_DEVICE :
1853                                        DMA_DIRECTION_TO_DEVICE,
1854                                        MEMTXATTRS_UNSPECIFIED);
1855        if (!sg[i].iov_base) {
1856            error_report("virtio: error trying to map MMIO memory");
1857            exit(1);
1858        }
1859        if (len != sg[i].iov_len) {
1860            error_report("virtio: unexpected memory split");
1861            exit(1);
1862        }
1863    }
1864}
1865
1866void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1867{
1868    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1869    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1870                                                                        false);
1871}
1872
1873static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1874{
1875    VirtQueueElement *elem;
1876    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1877    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1878    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1879    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1880    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1881    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1882
1883    assert(sz >= sizeof(VirtQueueElement));
1884    elem = g_malloc(out_sg_end);
1885    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1886    elem->out_num = out_num;
1887    elem->in_num = in_num;
1888    elem->in_addr = (void *)elem + in_addr_ofs;
1889    elem->out_addr = (void *)elem + out_addr_ofs;
1890    elem->in_sg = (void *)elem + in_sg_ofs;
1891    elem->out_sg = (void *)elem + out_sg_ofs;
1892    return elem;
1893}
1894
1895static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1896{
1897    unsigned int i, head, max;
1898    VRingMemoryRegionCaches *caches;
1899    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1900    MemoryRegionCache *desc_cache;
1901    int64_t len;
1902    VirtIODevice *vdev = vq->vdev;
1903    VirtQueueElement *elem = NULL;
1904    unsigned out_num, in_num, elem_entries;
1905    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1906    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1907    VRingDesc desc;
1908    int rc;
1909
1910    RCU_READ_LOCK_GUARD();
1911    if (virtio_queue_empty_rcu(vq)) {
1912        goto done;
1913    }
1914    /* Needed after virtio_queue_empty(), see comment in
1915     * virtqueue_num_heads(). */
1916    smp_rmb();
1917
1918    /* When we start there are none of either input nor output. */
1919    out_num = in_num = elem_entries = 0;
1920
1921    max = vq->vring.num;
1922
1923    if (vq->inuse >= vq->vring.num) {
1924        virtio_error(vdev, "Virtqueue size exceeded");
1925        goto done;
1926    }
1927
1928    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1929        goto done;
1930    }
1931
1932    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1933        vring_set_avail_event(vq, vq->last_avail_idx);
1934    }
1935
1936    i = head;
1937
1938    caches = vring_get_region_caches(vq);
1939    if (!caches) {
1940        virtio_error(vdev, "Region caches not initialized");
1941        goto done;
1942    }
1943
1944    if (caches->desc.len < max * sizeof(VRingDesc)) {
1945        virtio_error(vdev, "Cannot map descriptor ring");
1946        goto done;
1947    }
1948
1949    desc_cache = &caches->desc;
1950    vring_split_desc_read(vdev, &desc, desc_cache, i);
1951    if (desc.flags & VRING_DESC_F_INDIRECT) {
1952        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1953            virtio_error(vdev, "Invalid size for indirect buffer table");
1954            goto done;
1955        }
1956
1957        /* loop over the indirect descriptor table */
1958        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1959                                       desc.addr, desc.len, false);
1960        desc_cache = &indirect_desc_cache;
1961        if (len < desc.len) {
1962            virtio_error(vdev, "Cannot map indirect buffer");
1963            goto done;
1964        }
1965
1966        max = desc.len / sizeof(VRingDesc);
1967        i = 0;
1968        vring_split_desc_read(vdev, &desc, desc_cache, i);
1969    }
1970
1971    /* Collect all the descriptors */
1972    do {
1973        bool map_ok;
1974
1975        if (desc.flags & VRING_DESC_F_WRITE) {
1976            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1977                                        iov + out_num,
1978                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1979                                        desc.addr, desc.len);
1980        } else {
1981            if (in_num) {
1982                virtio_error(vdev, "Incorrect order for descriptors");
1983                goto err_undo_map;
1984            }
1985            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1986                                        VIRTQUEUE_MAX_SIZE, false,
1987                                        desc.addr, desc.len);
1988        }
1989        if (!map_ok) {
1990            goto err_undo_map;
1991        }
1992
1993        /* If we've got too many, that implies a descriptor loop. */
1994        if (++elem_entries > max) {
1995            virtio_error(vdev, "Looped descriptor");
1996            goto err_undo_map;
1997        }
1998
1999        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
2000    } while (rc == VIRTQUEUE_READ_DESC_MORE);
2001
2002    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
2003        goto err_undo_map;
2004    }
2005
2006    /* Now copy what we have collected and mapped */
2007    elem = virtqueue_alloc_element(sz, out_num, in_num);
2008    elem->index = head;
2009    elem->ndescs = 1;
2010    for (i = 0; i < out_num; i++) {
2011        elem->out_addr[i] = addr[i];
2012        elem->out_sg[i] = iov[i];
2013    }
2014    for (i = 0; i < in_num; i++) {
2015        elem->in_addr[i] = addr[out_num + i];
2016        elem->in_sg[i] = iov[out_num + i];
2017    }
2018
2019    vq->inuse++;
2020
2021    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2022done:
2023    address_space_cache_destroy(&indirect_desc_cache);
2024
2025    return elem;
2026
2027err_undo_map:
2028    virtqueue_undo_map_desc(out_num, in_num, iov);
2029    goto done;
2030}
2031
2032static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
2033{
2034    unsigned int i, max;
2035    VRingMemoryRegionCaches *caches;
2036    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
2037    MemoryRegionCache *desc_cache;
2038    int64_t len;
2039    VirtIODevice *vdev = vq->vdev;
2040    VirtQueueElement *elem = NULL;
2041    unsigned out_num, in_num, elem_entries;
2042    hwaddr addr[VIRTQUEUE_MAX_SIZE];
2043    struct iovec iov[VIRTQUEUE_MAX_SIZE];
2044    VRingPackedDesc desc;
2045    uint16_t id;
2046    int rc;
2047
2048    RCU_READ_LOCK_GUARD();
2049    if (virtio_queue_packed_empty_rcu(vq)) {
2050        goto done;
2051    }
2052
2053    /* When we start there are none of either input nor output. */
2054    out_num = in_num = elem_entries = 0;
2055
2056    max = vq->vring.num;
2057
2058    if (vq->inuse >= vq->vring.num) {
2059        virtio_error(vdev, "Virtqueue size exceeded");
2060        goto done;
2061    }
2062
2063    i = vq->last_avail_idx;
2064
2065    caches = vring_get_region_caches(vq);
2066    if (!caches) {
2067        virtio_error(vdev, "Region caches not initialized");
2068        goto done;
2069    }
2070
2071    if (caches->desc.len < max * sizeof(VRingDesc)) {
2072        virtio_error(vdev, "Cannot map descriptor ring");
2073        goto done;
2074    }
2075
2076    desc_cache = &caches->desc;
2077    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
2078    id = desc.id;
2079    if (desc.flags & VRING_DESC_F_INDIRECT) {
2080        if (desc.len % sizeof(VRingPackedDesc)) {
2081            virtio_error(vdev, "Invalid size for indirect buffer table");
2082            goto done;
2083        }
2084
2085        /* loop over the indirect descriptor table */
2086        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
2087                                       desc.addr, desc.len, false);
2088        desc_cache = &indirect_desc_cache;
2089        if (len < desc.len) {
2090            virtio_error(vdev, "Cannot map indirect buffer");
2091            goto done;
2092        }
2093
2094        max = desc.len / sizeof(VRingPackedDesc);
2095        i = 0;
2096        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
2097    }
2098
2099    /* Collect all the descriptors */
2100    do {
2101        bool map_ok;
2102
2103        if (desc.flags & VRING_DESC_F_WRITE) {
2104            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
2105                                        iov + out_num,
2106                                        VIRTQUEUE_MAX_SIZE - out_num, true,
2107                                        desc.addr, desc.len);
2108        } else {
2109            if (in_num) {
2110                virtio_error(vdev, "Incorrect order for descriptors");
2111                goto err_undo_map;
2112            }
2113            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
2114                                        VIRTQUEUE_MAX_SIZE, false,
2115                                        desc.addr, desc.len);
2116        }
2117        if (!map_ok) {
2118            goto err_undo_map;
2119        }
2120
2121        /* If we've got too many, that implies a descriptor loop. */
2122        if (++elem_entries > max) {
2123            virtio_error(vdev, "Looped descriptor");
2124            goto err_undo_map;
2125        }
2126
2127        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
2128                                             desc_cache ==
2129                                             &indirect_desc_cache);
2130    } while (rc == VIRTQUEUE_READ_DESC_MORE);
2131
2132    /* Now copy what we have collected and mapped */
2133    elem = virtqueue_alloc_element(sz, out_num, in_num);
2134    for (i = 0; i < out_num; i++) {
2135        elem->out_addr[i] = addr[i];
2136        elem->out_sg[i] = iov[i];
2137    }
2138    for (i = 0; i < in_num; i++) {
2139        elem->in_addr[i] = addr[out_num + i];
2140        elem->in_sg[i] = iov[out_num + i];
2141    }
2142
2143    elem->index = id;
2144    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
2145    vq->last_avail_idx += elem->ndescs;
2146    vq->inuse += elem->ndescs;
2147
2148    if (vq->last_avail_idx >= vq->vring.num) {
2149        vq->last_avail_idx -= vq->vring.num;
2150        vq->last_avail_wrap_counter ^= 1;
2151    }
2152
2153    vq->shadow_avail_idx = vq->last_avail_idx;
2154    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
2155
2156    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
2157done:
2158    address_space_cache_destroy(&indirect_desc_cache);
2159
2160    return elem;
2161
2162err_undo_map:
2163    virtqueue_undo_map_desc(out_num, in_num, iov);
2164    goto done;
2165}
2166
2167void *virtqueue_pop(VirtQueue *vq, size_t sz)
2168{
2169    if (virtio_device_disabled(vq->vdev)) {
2170        return NULL;
2171    }
2172
2173    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
2174        return virtqueue_packed_pop(vq, sz);
2175    } else {
2176        return virtqueue_split_pop(vq, sz);
2177    }
2178}
2179
2180static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
2181{
2182    VRingMemoryRegionCaches *caches;
2183    MemoryRegionCache *desc_cache;
2184    unsigned int dropped = 0;
2185    VirtQueueElement elem = {};
2186    VirtIODevice *vdev = vq->vdev;
2187    VRingPackedDesc desc;
2188
2189    RCU_READ_LOCK_GUARD();
2190
2191    caches = vring_get_region_caches(vq);
2192    if (!caches) {
2193        return 0;
2194    }
2195
2196    desc_cache = &caches->desc;
2197
2198    virtio_queue_set_notification(vq, 0);
2199
2200    while (vq->inuse < vq->vring.num) {
2201        unsigned int idx = vq->last_avail_idx;
2202        /*
2203         * works similar to virtqueue_pop but does not map buffers
2204         * and does not allocate any memory.
2205         */
2206        vring_packed_desc_read(vdev, &desc, desc_cache,
2207                               vq->last_avail_idx , true);
2208        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
2209            break;
2210        }
2211        elem.index = desc.id;
2212        elem.ndescs = 1;
2213        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
2214                                               vq->vring.num, &idx, false)) {
2215            ++elem.ndescs;
2216        }
2217        /*
2218         * immediately push the element, nothing to unmap
2219         * as both in_num and out_num are set to 0.
2220         */
2221        virtqueue_push(vq, &elem, 0);
2222        dropped++;
2223        vq->last_avail_idx += elem.ndescs;
2224        if (vq->last_avail_idx >= vq->vring.num) {
2225            vq->last_avail_idx -= vq->vring.num;
2226            vq->last_avail_wrap_counter ^= 1;
2227        }
2228    }
2229
2230    return dropped;
2231}
2232
2233static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
2234{
2235    unsigned int dropped = 0;
2236    VirtQueueElement elem = {};
2237    VirtIODevice *vdev = vq->vdev;
2238    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2239
2240    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
2241        /* works similar to virtqueue_pop but does not map buffers
2242        * and does not allocate any memory */
2243        smp_rmb();
2244        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
2245            break;
2246        }
2247        vq->inuse++;
2248        vq->last_avail_idx++;
2249        if (fEventIdx) {
2250            vring_set_avail_event(vq, vq->last_avail_idx);
2251        }
2252        /* immediately push the element, nothing to unmap
2253         * as both in_num and out_num are set to 0 */
2254        virtqueue_push(vq, &elem, 0);
2255        dropped++;
2256    }
2257
2258    return dropped;
2259}
2260
2261/* virtqueue_drop_all:
2262 * @vq: The #VirtQueue
2263 * Drops all queued buffers and indicates them to the guest
2264 * as if they are done. Useful when buffers can not be
2265 * processed but must be returned to the guest.
2266 */
2267unsigned int virtqueue_drop_all(VirtQueue *vq)
2268{
2269    struct VirtIODevice *vdev = vq->vdev;
2270
2271    if (virtio_device_disabled(vq->vdev)) {
2272        return 0;
2273    }
2274
2275    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2276        return virtqueue_packed_drop_all(vq);
2277    } else {
2278        return virtqueue_split_drop_all(vq);
2279    }
2280}
2281
2282/* Reading and writing a structure directly to QEMUFile is *awful*, but
2283 * it is what QEMU has always done by mistake.  We can change it sooner
2284 * or later by bumping the version number of the affected vm states.
2285 * In the meanwhile, since the in-memory layout of VirtQueueElement
2286 * has changed, we need to marshal to and from the layout that was
2287 * used before the change.
2288 */
2289typedef struct VirtQueueElementOld {
2290    unsigned int index;
2291    unsigned int out_num;
2292    unsigned int in_num;
2293    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
2294    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
2295    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
2296    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
2297} VirtQueueElementOld;
2298
2299void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
2300{
2301    VirtQueueElement *elem;
2302    VirtQueueElementOld data;
2303    int i;
2304
2305    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2306
2307    /* TODO: teach all callers that this can fail, and return failure instead
2308     * of asserting here.
2309     * This is just one thing (there are probably more) that must be
2310     * fixed before we can allow NDEBUG compilation.
2311     */
2312    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
2313    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
2314
2315    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
2316    elem->index = data.index;
2317
2318    for (i = 0; i < elem->in_num; i++) {
2319        elem->in_addr[i] = data.in_addr[i];
2320    }
2321
2322    for (i = 0; i < elem->out_num; i++) {
2323        elem->out_addr[i] = data.out_addr[i];
2324    }
2325
2326    for (i = 0; i < elem->in_num; i++) {
2327        /* Base is overwritten by virtqueue_map.  */
2328        elem->in_sg[i].iov_base = 0;
2329        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
2330    }
2331
2332    for (i = 0; i < elem->out_num; i++) {
2333        /* Base is overwritten by virtqueue_map.  */
2334        elem->out_sg[i].iov_base = 0;
2335        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
2336    }
2337
2338    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2339        qemu_get_be32s(f, &elem->ndescs);
2340    }
2341
2342    virtqueue_map(vdev, elem);
2343    return elem;
2344}
2345
2346void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
2347                                VirtQueueElement *elem)
2348{
2349    VirtQueueElementOld data;
2350    int i;
2351
2352    memset(&data, 0, sizeof(data));
2353    data.index = elem->index;
2354    data.in_num = elem->in_num;
2355    data.out_num = elem->out_num;
2356
2357    for (i = 0; i < elem->in_num; i++) {
2358        data.in_addr[i] = elem->in_addr[i];
2359    }
2360
2361    for (i = 0; i < elem->out_num; i++) {
2362        data.out_addr[i] = elem->out_addr[i];
2363    }
2364
2365    for (i = 0; i < elem->in_num; i++) {
2366        /* Base is overwritten by virtqueue_map when loading.  Do not
2367         * save it, as it would leak the QEMU address space layout.  */
2368        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
2369    }
2370
2371    for (i = 0; i < elem->out_num; i++) {
2372        /* Do not save iov_base as above.  */
2373        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
2374    }
2375
2376    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2377        qemu_put_be32s(f, &elem->ndescs);
2378    }
2379
2380    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2381}
2382
2383/* virtio device */
2384static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
2385{
2386    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2387    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2388
2389    if (virtio_device_disabled(vdev)) {
2390        return;
2391    }
2392
2393    if (k->notify) {
2394        k->notify(qbus->parent, vector);
2395    }
2396}
2397
2398void virtio_update_irq(VirtIODevice *vdev)
2399{
2400    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2401}
2402
2403static int virtio_validate_features(VirtIODevice *vdev)
2404{
2405    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2406
2407    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2408        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2409        return -EFAULT;
2410    }
2411
2412    if (k->validate_features) {
2413        return k->validate_features(vdev);
2414    } else {
2415        return 0;
2416    }
2417}
2418
2419int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2420{
2421    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2422    trace_virtio_set_status(vdev, val);
2423
2424    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2425        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2426            val & VIRTIO_CONFIG_S_FEATURES_OK) {
2427            int ret = virtio_validate_features(vdev);
2428
2429            if (ret) {
2430                return ret;
2431            }
2432        }
2433    }
2434
2435    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2436        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2437        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2438    }
2439
2440    if (k->set_status) {
2441        k->set_status(vdev, val);
2442    }
2443    vdev->status = val;
2444
2445    return 0;
2446}
2447
2448static enum virtio_device_endian virtio_default_endian(void)
2449{
2450    if (target_words_bigendian()) {
2451        return VIRTIO_DEVICE_ENDIAN_BIG;
2452    } else {
2453        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2454    }
2455}
2456
2457static enum virtio_device_endian virtio_current_cpu_endian(void)
2458{
2459    if (cpu_virtio_is_big_endian(current_cpu)) {
2460        return VIRTIO_DEVICE_ENDIAN_BIG;
2461    } else {
2462        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2463    }
2464}
2465
2466static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2467{
2468    vdev->vq[i].vring.desc = 0;
2469    vdev->vq[i].vring.avail = 0;
2470    vdev->vq[i].vring.used = 0;
2471    vdev->vq[i].last_avail_idx = 0;
2472    vdev->vq[i].shadow_avail_idx = 0;
2473    vdev->vq[i].used_idx = 0;
2474    vdev->vq[i].last_avail_wrap_counter = true;
2475    vdev->vq[i].shadow_avail_wrap_counter = true;
2476    vdev->vq[i].used_wrap_counter = true;
2477    virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2478    vdev->vq[i].signalled_used = 0;
2479    vdev->vq[i].signalled_used_valid = false;
2480    vdev->vq[i].notification = true;
2481    vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2482    vdev->vq[i].inuse = 0;
2483    virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2484}
2485
2486void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2487{
2488    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2489
2490    if (k->queue_reset) {
2491        k->queue_reset(vdev, queue_index);
2492    }
2493
2494    __virtio_queue_reset(vdev, queue_index);
2495}
2496
2497void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2498{
2499    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2500
2501    /*
2502     * TODO: Seabios is currently out of spec and triggering this error.
2503     * So this needs to be fixed in Seabios, then this can
2504     * be re-enabled for new machine types only, and also after
2505     * being converted to LOG_GUEST_ERROR.
2506     *
2507    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2508        error_report("queue_enable is only suppported in devices of virtio "
2509                     "1.0 or later.");
2510    }
2511    */
2512
2513    if (k->queue_enable) {
2514        k->queue_enable(vdev, queue_index);
2515    }
2516}
2517
2518void virtio_reset(void *opaque)
2519{
2520    VirtIODevice *vdev = opaque;
2521    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2522    int i;
2523
2524    virtio_set_status(vdev, 0);
2525    if (current_cpu) {
2526        /* Guest initiated reset */
2527        vdev->device_endian = virtio_current_cpu_endian();
2528    } else {
2529        /* System reset */
2530        vdev->device_endian = virtio_default_endian();
2531    }
2532
2533    if (k->reset) {
2534        k->reset(vdev);
2535    }
2536
2537    vdev->start_on_kick = false;
2538    vdev->started = false;
2539    vdev->broken = false;
2540    vdev->guest_features = 0;
2541    vdev->queue_sel = 0;
2542    vdev->status = 0;
2543    vdev->disabled = false;
2544    qatomic_set(&vdev->isr, 0);
2545    vdev->config_vector = VIRTIO_NO_VECTOR;
2546    virtio_notify_vector(vdev, vdev->config_vector);
2547
2548    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2549        __virtio_queue_reset(vdev, i);
2550    }
2551}
2552
2553uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2554{
2555    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2556    uint8_t val;
2557
2558    if (addr + sizeof(val) > vdev->config_len) {
2559        return (uint32_t)-1;
2560    }
2561
2562    k->get_config(vdev, vdev->config);
2563
2564    val = ldub_p(vdev->config + addr);
2565    return val;
2566}
2567
2568uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2569{
2570    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2571    uint16_t val;
2572
2573    if (addr + sizeof(val) > vdev->config_len) {
2574        return (uint32_t)-1;
2575    }
2576
2577    k->get_config(vdev, vdev->config);
2578
2579    val = lduw_p(vdev->config + addr);
2580    return val;
2581}
2582
2583uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2584{
2585    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2586    uint32_t val;
2587
2588    if (addr + sizeof(val) > vdev->config_len) {
2589        return (uint32_t)-1;
2590    }
2591
2592    k->get_config(vdev, vdev->config);
2593
2594    val = ldl_p(vdev->config + addr);
2595    return val;
2596}
2597
2598void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2599{
2600    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2601    uint8_t val = data;
2602
2603    if (addr + sizeof(val) > vdev->config_len) {
2604        return;
2605    }
2606
2607    stb_p(vdev->config + addr, val);
2608
2609    if (k->set_config) {
2610        k->set_config(vdev, vdev->config);
2611    }
2612}
2613
2614void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2615{
2616    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2617    uint16_t val = data;
2618
2619    if (addr + sizeof(val) > vdev->config_len) {
2620        return;
2621    }
2622
2623    stw_p(vdev->config + addr, val);
2624
2625    if (k->set_config) {
2626        k->set_config(vdev, vdev->config);
2627    }
2628}
2629
2630void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2631{
2632    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2633    uint32_t val = data;
2634
2635    if (addr + sizeof(val) > vdev->config_len) {
2636        return;
2637    }
2638
2639    stl_p(vdev->config + addr, val);
2640
2641    if (k->set_config) {
2642        k->set_config(vdev, vdev->config);
2643    }
2644}
2645
2646uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2647{
2648    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2649    uint8_t val;
2650
2651    if (addr + sizeof(val) > vdev->config_len) {
2652        return (uint32_t)-1;
2653    }
2654
2655    k->get_config(vdev, vdev->config);
2656
2657    val = ldub_p(vdev->config + addr);
2658    return val;
2659}
2660
2661uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2662{
2663    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2664    uint16_t val;
2665
2666    if (addr + sizeof(val) > vdev->config_len) {
2667        return (uint32_t)-1;
2668    }
2669
2670    k->get_config(vdev, vdev->config);
2671
2672    val = lduw_le_p(vdev->config + addr);
2673    return val;
2674}
2675
2676uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2677{
2678    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2679    uint32_t val;
2680
2681    if (addr + sizeof(val) > vdev->config_len) {
2682        return (uint32_t)-1;
2683    }
2684
2685    k->get_config(vdev, vdev->config);
2686
2687    val = ldl_le_p(vdev->config + addr);
2688    return val;
2689}
2690
2691void virtio_config_modern_writeb(VirtIODevice *vdev,
2692                                 uint32_t addr, uint32_t data)
2693{
2694    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2695    uint8_t val = data;
2696
2697    if (addr + sizeof(val) > vdev->config_len) {
2698        return;
2699    }
2700
2701    stb_p(vdev->config + addr, val);
2702
2703    if (k->set_config) {
2704        k->set_config(vdev, vdev->config);
2705    }
2706}
2707
2708void virtio_config_modern_writew(VirtIODevice *vdev,
2709                                 uint32_t addr, uint32_t data)
2710{
2711    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2712    uint16_t val = data;
2713
2714    if (addr + sizeof(val) > vdev->config_len) {
2715        return;
2716    }
2717
2718    stw_le_p(vdev->config + addr, val);
2719
2720    if (k->set_config) {
2721        k->set_config(vdev, vdev->config);
2722    }
2723}
2724
2725void virtio_config_modern_writel(VirtIODevice *vdev,
2726                                 uint32_t addr, uint32_t data)
2727{
2728    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2729    uint32_t val = data;
2730
2731    if (addr + sizeof(val) > vdev->config_len) {
2732        return;
2733    }
2734
2735    stl_le_p(vdev->config + addr, val);
2736
2737    if (k->set_config) {
2738        k->set_config(vdev, vdev->config);
2739    }
2740}
2741
2742void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2743{
2744    if (!vdev->vq[n].vring.num) {
2745        return;
2746    }
2747    vdev->vq[n].vring.desc = addr;
2748    virtio_queue_update_rings(vdev, n);
2749}
2750
2751hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2752{
2753    return vdev->vq[n].vring.desc;
2754}
2755
2756void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2757                            hwaddr avail, hwaddr used)
2758{
2759    if (!vdev->vq[n].vring.num) {
2760        return;
2761    }
2762    vdev->vq[n].vring.desc = desc;
2763    vdev->vq[n].vring.avail = avail;
2764    vdev->vq[n].vring.used = used;
2765    virtio_init_region_cache(vdev, n);
2766}
2767
2768void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2769{
2770    /* Don't allow guest to flip queue between existent and
2771     * nonexistent states, or to set it to an invalid size.
2772     */
2773    if (!!num != !!vdev->vq[n].vring.num ||
2774        num > VIRTQUEUE_MAX_SIZE ||
2775        num < 0) {
2776        return;
2777    }
2778    vdev->vq[n].vring.num = num;
2779}
2780
2781VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2782{
2783    return QLIST_FIRST(&vdev->vector_queues[vector]);
2784}
2785
2786VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2787{
2788    return QLIST_NEXT(vq, node);
2789}
2790
2791int virtio_queue_get_num(VirtIODevice *vdev, int n)
2792{
2793    return vdev->vq[n].vring.num;
2794}
2795
2796int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2797{
2798    return vdev->vq[n].vring.num_default;
2799}
2800
2801int virtio_get_num_queues(VirtIODevice *vdev)
2802{
2803    int i;
2804
2805    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2806        if (!virtio_queue_get_num(vdev, i)) {
2807            break;
2808        }
2809    }
2810
2811    return i;
2812}
2813
2814void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2815{
2816    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2817    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2818
2819    /* virtio-1 compliant devices cannot change the alignment */
2820    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2821        error_report("tried to modify queue alignment for virtio-1 device");
2822        return;
2823    }
2824    /* Check that the transport told us it was going to do this
2825     * (so a buggy transport will immediately assert rather than
2826     * silently failing to migrate this state)
2827     */
2828    assert(k->has_variable_vring_alignment);
2829
2830    if (align) {
2831        vdev->vq[n].vring.align = align;
2832        virtio_queue_update_rings(vdev, n);
2833    }
2834}
2835
2836static void virtio_queue_notify_vq(VirtQueue *vq)
2837{
2838    if (vq->vring.desc && vq->handle_output) {
2839        VirtIODevice *vdev = vq->vdev;
2840
2841        if (unlikely(vdev->broken)) {
2842            return;
2843        }
2844
2845        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2846        vq->handle_output(vdev, vq);
2847
2848        if (unlikely(vdev->start_on_kick)) {
2849            virtio_set_started(vdev, true);
2850        }
2851    }
2852}
2853
2854void virtio_queue_notify(VirtIODevice *vdev, int n)
2855{
2856    VirtQueue *vq = &vdev->vq[n];
2857
2858    if (unlikely(!vq->vring.desc || vdev->broken)) {
2859        return;
2860    }
2861
2862    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2863    if (vq->host_notifier_enabled) {
2864        event_notifier_set(&vq->host_notifier);
2865    } else if (vq->handle_output) {
2866        vq->handle_output(vdev, vq);
2867
2868        if (unlikely(vdev->start_on_kick)) {
2869            virtio_set_started(vdev, true);
2870        }
2871    }
2872}
2873
2874uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2875{
2876    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2877        VIRTIO_NO_VECTOR;
2878}
2879
2880void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2881{
2882    VirtQueue *vq = &vdev->vq[n];
2883
2884    if (n < VIRTIO_QUEUE_MAX) {
2885        if (vdev->vector_queues &&
2886            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2887            QLIST_REMOVE(vq, node);
2888        }
2889        vdev->vq[n].vector = vector;
2890        if (vdev->vector_queues &&
2891            vector != VIRTIO_NO_VECTOR) {
2892            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2893        }
2894    }
2895}
2896
2897VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2898                            VirtIOHandleOutput handle_output)
2899{
2900    int i;
2901
2902    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2903        if (vdev->vq[i].vring.num == 0)
2904            break;
2905    }
2906
2907    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2908        abort();
2909
2910    vdev->vq[i].vring.num = queue_size;
2911    vdev->vq[i].vring.num_default = queue_size;
2912    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2913    vdev->vq[i].handle_output = handle_output;
2914    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2915
2916    return &vdev->vq[i];
2917}
2918
2919void virtio_delete_queue(VirtQueue *vq)
2920{
2921    vq->vring.num = 0;
2922    vq->vring.num_default = 0;
2923    vq->handle_output = NULL;
2924    g_free(vq->used_elems);
2925    vq->used_elems = NULL;
2926    virtio_virtqueue_reset_region_cache(vq);
2927}
2928
2929void virtio_del_queue(VirtIODevice *vdev, int n)
2930{
2931    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2932        abort();
2933    }
2934
2935    virtio_delete_queue(&vdev->vq[n]);
2936}
2937
2938static void virtio_set_isr(VirtIODevice *vdev, int value)
2939{
2940    uint8_t old = qatomic_read(&vdev->isr);
2941
2942    /* Do not write ISR if it does not change, so that its cacheline remains
2943     * shared in the common case where the guest does not read it.
2944     */
2945    if ((old & value) != value) {
2946        qatomic_or(&vdev->isr, value);
2947    }
2948}
2949
2950/* Called within rcu_read_lock(). */
2951static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2952{
2953    uint16_t old, new;
2954    bool v;
2955    /* We need to expose used array entries before checking used event. */
2956    smp_mb();
2957    /* Always notify when queue is empty (when feature acknowledge) */
2958    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2959        !vq->inuse && virtio_queue_empty(vq)) {
2960        return true;
2961    }
2962
2963    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2964        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2965    }
2966
2967    v = vq->signalled_used_valid;
2968    vq->signalled_used_valid = true;
2969    old = vq->signalled_used;
2970    new = vq->signalled_used = vq->used_idx;
2971    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2972}
2973
2974static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2975                                    uint16_t off_wrap, uint16_t new,
2976                                    uint16_t old)
2977{
2978    int off = off_wrap & ~(1 << 15);
2979
2980    if (wrap != off_wrap >> 15) {
2981        off -= vq->vring.num;
2982    }
2983
2984    return vring_need_event(off, new, old);
2985}
2986
2987/* Called within rcu_read_lock(). */
2988static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2989{
2990    VRingPackedDescEvent e;
2991    uint16_t old, new;
2992    bool v;
2993    VRingMemoryRegionCaches *caches;
2994
2995    caches = vring_get_region_caches(vq);
2996    if (!caches) {
2997        return false;
2998    }
2999
3000    vring_packed_event_read(vdev, &caches->avail, &e);
3001
3002    old = vq->signalled_used;
3003    new = vq->signalled_used = vq->used_idx;
3004    v = vq->signalled_used_valid;
3005    vq->signalled_used_valid = true;
3006
3007    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
3008        return false;
3009    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
3010        return true;
3011    }
3012
3013    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
3014                                         e.off_wrap, new, old);
3015}
3016
3017/* Called within rcu_read_lock().  */
3018static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
3019{
3020    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3021        return virtio_packed_should_notify(vdev, vq);
3022    } else {
3023        return virtio_split_should_notify(vdev, vq);
3024    }
3025}
3026
3027void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
3028{
3029    WITH_RCU_READ_LOCK_GUARD() {
3030        if (!virtio_should_notify(vdev, vq)) {
3031            return;
3032        }
3033    }
3034
3035    trace_virtio_notify_irqfd(vdev, vq);
3036
3037    /*
3038     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
3039     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
3040     * incorrectly polling this bit during crashdump and hibernation
3041     * in MSI mode, causing a hang if this bit is never updated.
3042     * Recent releases of Windows do not really shut down, but rather
3043     * log out and hibernate to make the next startup faster.  Hence,
3044     * this manifested as a more serious hang during shutdown with
3045     *
3046     * Next driver release from 2016 fixed this problem, so working around it
3047     * is not a must, but it's easy to do so let's do it here.
3048     *
3049     * Note: it's safe to update ISR from any thread as it was switched
3050     * to an atomic operation.
3051     */
3052    virtio_set_isr(vq->vdev, 0x1);
3053    event_notifier_set(&vq->guest_notifier);
3054}
3055
3056static void virtio_irq(VirtQueue *vq)
3057{
3058    virtio_set_isr(vq->vdev, 0x1);
3059    virtio_notify_vector(vq->vdev, vq->vector);
3060}
3061
3062void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
3063{
3064    WITH_RCU_READ_LOCK_GUARD() {
3065        if (!virtio_should_notify(vdev, vq)) {
3066            return;
3067        }
3068    }
3069
3070    trace_virtio_notify(vdev, vq);
3071    virtio_irq(vq);
3072}
3073
3074void virtio_notify_config(VirtIODevice *vdev)
3075{
3076    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
3077        return;
3078
3079    virtio_set_isr(vdev, 0x3);
3080    vdev->generation++;
3081    virtio_notify_vector(vdev, vdev->config_vector);
3082}
3083
3084static bool virtio_device_endian_needed(void *opaque)
3085{
3086    VirtIODevice *vdev = opaque;
3087
3088    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
3089    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3090        return vdev->device_endian != virtio_default_endian();
3091    }
3092    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
3093    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
3094}
3095
3096static bool virtio_64bit_features_needed(void *opaque)
3097{
3098    VirtIODevice *vdev = opaque;
3099
3100    return (vdev->host_features >> 32) != 0;
3101}
3102
3103static bool virtio_virtqueue_needed(void *opaque)
3104{
3105    VirtIODevice *vdev = opaque;
3106
3107    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
3108}
3109
3110static bool virtio_packed_virtqueue_needed(void *opaque)
3111{
3112    VirtIODevice *vdev = opaque;
3113
3114    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
3115}
3116
3117static bool virtio_ringsize_needed(void *opaque)
3118{
3119    VirtIODevice *vdev = opaque;
3120    int i;
3121
3122    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3123        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
3124            return true;
3125        }
3126    }
3127    return false;
3128}
3129
3130static bool virtio_extra_state_needed(void *opaque)
3131{
3132    VirtIODevice *vdev = opaque;
3133    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3134    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3135
3136    return k->has_extra_state &&
3137        k->has_extra_state(qbus->parent);
3138}
3139
3140static bool virtio_broken_needed(void *opaque)
3141{
3142    VirtIODevice *vdev = opaque;
3143
3144    return vdev->broken;
3145}
3146
3147static bool virtio_started_needed(void *opaque)
3148{
3149    VirtIODevice *vdev = opaque;
3150
3151    return vdev->started;
3152}
3153
3154static bool virtio_disabled_needed(void *opaque)
3155{
3156    VirtIODevice *vdev = opaque;
3157
3158    return vdev->disabled;
3159}
3160
3161static const VMStateDescription vmstate_virtqueue = {
3162    .name = "virtqueue_state",
3163    .version_id = 1,
3164    .minimum_version_id = 1,
3165    .fields = (VMStateField[]) {
3166        VMSTATE_UINT64(vring.avail, struct VirtQueue),
3167        VMSTATE_UINT64(vring.used, struct VirtQueue),
3168        VMSTATE_END_OF_LIST()
3169    }
3170};
3171
3172static const VMStateDescription vmstate_packed_virtqueue = {
3173    .name = "packed_virtqueue_state",
3174    .version_id = 1,
3175    .minimum_version_id = 1,
3176    .fields = (VMStateField[]) {
3177        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
3178        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
3179        VMSTATE_UINT16(used_idx, struct VirtQueue),
3180        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
3181        VMSTATE_UINT32(inuse, struct VirtQueue),
3182        VMSTATE_END_OF_LIST()
3183    }
3184};
3185
3186static const VMStateDescription vmstate_virtio_virtqueues = {
3187    .name = "virtio/virtqueues",
3188    .version_id = 1,
3189    .minimum_version_id = 1,
3190    .needed = &virtio_virtqueue_needed,
3191    .fields = (VMStateField[]) {
3192        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3193                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
3194        VMSTATE_END_OF_LIST()
3195    }
3196};
3197
3198static const VMStateDescription vmstate_virtio_packed_virtqueues = {
3199    .name = "virtio/packed_virtqueues",
3200    .version_id = 1,
3201    .minimum_version_id = 1,
3202    .needed = &virtio_packed_virtqueue_needed,
3203    .fields = (VMStateField[]) {
3204        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3205                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
3206        VMSTATE_END_OF_LIST()
3207    }
3208};
3209
3210static const VMStateDescription vmstate_ringsize = {
3211    .name = "ringsize_state",
3212    .version_id = 1,
3213    .minimum_version_id = 1,
3214    .fields = (VMStateField[]) {
3215        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
3216        VMSTATE_END_OF_LIST()
3217    }
3218};
3219
3220static const VMStateDescription vmstate_virtio_ringsize = {
3221    .name = "virtio/ringsize",
3222    .version_id = 1,
3223    .minimum_version_id = 1,
3224    .needed = &virtio_ringsize_needed,
3225    .fields = (VMStateField[]) {
3226        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
3227                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
3228        VMSTATE_END_OF_LIST()
3229    }
3230};
3231
3232static int get_extra_state(QEMUFile *f, void *pv, size_t size,
3233                           const VMStateField *field)
3234{
3235    VirtIODevice *vdev = pv;
3236    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3237    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3238
3239    if (!k->load_extra_state) {
3240        return -1;
3241    } else {
3242        return k->load_extra_state(qbus->parent, f);
3243    }
3244}
3245
3246static int put_extra_state(QEMUFile *f, void *pv, size_t size,
3247                           const VMStateField *field, JSONWriter *vmdesc)
3248{
3249    VirtIODevice *vdev = pv;
3250    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3251    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3252
3253    k->save_extra_state(qbus->parent, f);
3254    return 0;
3255}
3256
3257static const VMStateInfo vmstate_info_extra_state = {
3258    .name = "virtqueue_extra_state",
3259    .get = get_extra_state,
3260    .put = put_extra_state,
3261};
3262
3263static const VMStateDescription vmstate_virtio_extra_state = {
3264    .name = "virtio/extra_state",
3265    .version_id = 1,
3266    .minimum_version_id = 1,
3267    .needed = &virtio_extra_state_needed,
3268    .fields = (VMStateField[]) {
3269        {
3270            .name         = "extra_state",
3271            .version_id   = 0,
3272            .field_exists = NULL,
3273            .size         = 0,
3274            .info         = &vmstate_info_extra_state,
3275            .flags        = VMS_SINGLE,
3276            .offset       = 0,
3277        },
3278        VMSTATE_END_OF_LIST()
3279    }
3280};
3281
3282static const VMStateDescription vmstate_virtio_device_endian = {
3283    .name = "virtio/device_endian",
3284    .version_id = 1,
3285    .minimum_version_id = 1,
3286    .needed = &virtio_device_endian_needed,
3287    .fields = (VMStateField[]) {
3288        VMSTATE_UINT8(device_endian, VirtIODevice),
3289        VMSTATE_END_OF_LIST()
3290    }
3291};
3292
3293static const VMStateDescription vmstate_virtio_64bit_features = {
3294    .name = "virtio/64bit_features",
3295    .version_id = 1,
3296    .minimum_version_id = 1,
3297    .needed = &virtio_64bit_features_needed,
3298    .fields = (VMStateField[]) {
3299        VMSTATE_UINT64(guest_features, VirtIODevice),
3300        VMSTATE_END_OF_LIST()
3301    }
3302};
3303
3304static const VMStateDescription vmstate_virtio_broken = {
3305    .name = "virtio/broken",
3306    .version_id = 1,
3307    .minimum_version_id = 1,
3308    .needed = &virtio_broken_needed,
3309    .fields = (VMStateField[]) {
3310        VMSTATE_BOOL(broken, VirtIODevice),
3311        VMSTATE_END_OF_LIST()
3312    }
3313};
3314
3315static const VMStateDescription vmstate_virtio_started = {
3316    .name = "virtio/started",
3317    .version_id = 1,
3318    .minimum_version_id = 1,
3319    .needed = &virtio_started_needed,
3320    .fields = (VMStateField[]) {
3321        VMSTATE_BOOL(started, VirtIODevice),
3322        VMSTATE_END_OF_LIST()
3323    }
3324};
3325
3326static const VMStateDescription vmstate_virtio_disabled = {
3327    .name = "virtio/disabled",
3328    .version_id = 1,
3329    .minimum_version_id = 1,
3330    .needed = &virtio_disabled_needed,
3331    .fields = (VMStateField[]) {
3332        VMSTATE_BOOL(disabled, VirtIODevice),
3333        VMSTATE_END_OF_LIST()
3334    }
3335};
3336
3337static const VMStateDescription vmstate_virtio = {
3338    .name = "virtio",
3339    .version_id = 1,
3340    .minimum_version_id = 1,
3341    .fields = (VMStateField[]) {
3342        VMSTATE_END_OF_LIST()
3343    },
3344    .subsections = (const VMStateDescription*[]) {
3345        &vmstate_virtio_device_endian,
3346        &vmstate_virtio_64bit_features,
3347        &vmstate_virtio_virtqueues,
3348        &vmstate_virtio_ringsize,
3349        &vmstate_virtio_broken,
3350        &vmstate_virtio_extra_state,
3351        &vmstate_virtio_started,
3352        &vmstate_virtio_packed_virtqueues,
3353        &vmstate_virtio_disabled,
3354        NULL
3355    }
3356};
3357
3358int virtio_save(VirtIODevice *vdev, QEMUFile *f)
3359{
3360    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3361    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3362    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3363    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
3364    int i;
3365
3366    if (k->save_config) {
3367        k->save_config(qbus->parent, f);
3368    }
3369
3370    qemu_put_8s(f, &vdev->status);
3371    qemu_put_8s(f, &vdev->isr);
3372    qemu_put_be16s(f, &vdev->queue_sel);
3373    qemu_put_be32s(f, &guest_features_lo);
3374    qemu_put_be32(f, vdev->config_len);
3375    qemu_put_buffer(f, vdev->config, vdev->config_len);
3376
3377    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3378        if (vdev->vq[i].vring.num == 0)
3379            break;
3380    }
3381
3382    qemu_put_be32(f, i);
3383
3384    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3385        if (vdev->vq[i].vring.num == 0)
3386            break;
3387
3388        qemu_put_be32(f, vdev->vq[i].vring.num);
3389        if (k->has_variable_vring_alignment) {
3390            qemu_put_be32(f, vdev->vq[i].vring.align);
3391        }
3392        /*
3393         * Save desc now, the rest of the ring addresses are saved in
3394         * subsections for VIRTIO-1 devices.
3395         */
3396        qemu_put_be64(f, vdev->vq[i].vring.desc);
3397        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
3398        if (k->save_queue) {
3399            k->save_queue(qbus->parent, i, f);
3400        }
3401    }
3402
3403    if (vdc->save != NULL) {
3404        vdc->save(vdev, f);
3405    }
3406
3407    if (vdc->vmsd) {
3408        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
3409        if (ret) {
3410            return ret;
3411        }
3412    }
3413
3414    /* Subsections */
3415    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
3416}
3417
3418/* A wrapper for use as a VMState .put function */
3419static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
3420                              const VMStateField *field, JSONWriter *vmdesc)
3421{
3422    return virtio_save(VIRTIO_DEVICE(opaque), f);
3423}
3424
3425/* A wrapper for use as a VMState .get function */
3426static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
3427                             const VMStateField *field)
3428{
3429    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
3430    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
3431
3432    return virtio_load(vdev, f, dc->vmsd->version_id);
3433}
3434
3435const VMStateInfo  virtio_vmstate_info = {
3436    .name = "virtio",
3437    .get = virtio_device_get,
3438    .put = virtio_device_put,
3439};
3440
3441static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
3442{
3443    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3444    bool bad = (val & ~(vdev->host_features)) != 0;
3445
3446    val &= vdev->host_features;
3447    if (k->set_features) {
3448        k->set_features(vdev, val);
3449    }
3450    vdev->guest_features = val;
3451    return bad ? -1 : 0;
3452}
3453
3454int virtio_set_features(VirtIODevice *vdev, uint64_t val)
3455{
3456    int ret;
3457    /*
3458     * The driver must not attempt to set features after feature negotiation
3459     * has finished.
3460     */
3461    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
3462        return -EINVAL;
3463    }
3464
3465    if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
3466        qemu_log_mask(LOG_GUEST_ERROR,
3467                      "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
3468                      __func__, vdev->name);
3469    }
3470
3471    ret = virtio_set_features_nocheck(vdev, val);
3472    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
3473        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
3474        int i;
3475        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3476            if (vdev->vq[i].vring.num != 0) {
3477                virtio_init_region_cache(vdev, i);
3478            }
3479        }
3480    }
3481    if (!ret) {
3482        if (!virtio_device_started(vdev, vdev->status) &&
3483            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3484            vdev->start_on_kick = true;
3485        }
3486    }
3487    return ret;
3488}
3489
3490size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
3491                              uint64_t host_features)
3492{
3493    size_t config_size = params->min_size;
3494    const VirtIOFeature *feature_sizes = params->feature_sizes;
3495    size_t i;
3496
3497    for (i = 0; feature_sizes[i].flags != 0; i++) {
3498        if (host_features & feature_sizes[i].flags) {
3499            config_size = MAX(feature_sizes[i].end, config_size);
3500        }
3501    }
3502
3503    assert(config_size <= params->max_size);
3504    return config_size;
3505}
3506
3507int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3508{
3509    int i, ret;
3510    int32_t config_len;
3511    uint32_t num;
3512    uint32_t features;
3513    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3514    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3515    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3516
3517    /*
3518     * We poison the endianness to ensure it does not get used before
3519     * subsections have been loaded.
3520     */
3521    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3522
3523    if (k->load_config) {
3524        ret = k->load_config(qbus->parent, f);
3525        if (ret)
3526            return ret;
3527    }
3528
3529    qemu_get_8s(f, &vdev->status);
3530    qemu_get_8s(f, &vdev->isr);
3531    qemu_get_be16s(f, &vdev->queue_sel);
3532    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3533        return -1;
3534    }
3535    qemu_get_be32s(f, &features);
3536
3537    /*
3538     * Temporarily set guest_features low bits - needed by
3539     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3540     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3541     *
3542     * Note: devices should always test host features in future - don't create
3543     * new dependencies like this.
3544     */
3545    vdev->guest_features = features;
3546
3547    config_len = qemu_get_be32(f);
3548
3549    /*
3550     * There are cases where the incoming config can be bigger or smaller
3551     * than what we have; so load what we have space for, and skip
3552     * any excess that's in the stream.
3553     */
3554    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3555
3556    while (config_len > vdev->config_len) {
3557        qemu_get_byte(f);
3558        config_len--;
3559    }
3560
3561    num = qemu_get_be32(f);
3562
3563    if (num > VIRTIO_QUEUE_MAX) {
3564        error_report("Invalid number of virtqueues: 0x%x", num);
3565        return -1;
3566    }
3567
3568    for (i = 0; i < num; i++) {
3569        vdev->vq[i].vring.num = qemu_get_be32(f);
3570        if (k->has_variable_vring_alignment) {
3571            vdev->vq[i].vring.align = qemu_get_be32(f);
3572        }
3573        vdev->vq[i].vring.desc = qemu_get_be64(f);
3574        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3575        vdev->vq[i].signalled_used_valid = false;
3576        vdev->vq[i].notification = true;
3577
3578        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3579            error_report("VQ %d address 0x0 "
3580                         "inconsistent with Host index 0x%x",
3581                         i, vdev->vq[i].last_avail_idx);
3582            return -1;
3583        }
3584        if (k->load_queue) {
3585            ret = k->load_queue(qbus->parent, i, f);
3586            if (ret)
3587                return ret;
3588        }
3589    }
3590
3591    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3592
3593    if (vdc->load != NULL) {
3594        ret = vdc->load(vdev, f, version_id);
3595        if (ret) {
3596            return ret;
3597        }
3598    }
3599
3600    if (vdc->vmsd) {
3601        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3602        if (ret) {
3603            return ret;
3604        }
3605    }
3606
3607    /* Subsections */
3608    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3609    if (ret) {
3610        return ret;
3611    }
3612
3613    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3614        vdev->device_endian = virtio_default_endian();
3615    }
3616
3617    if (virtio_64bit_features_needed(vdev)) {
3618        /*
3619         * Subsection load filled vdev->guest_features.  Run them
3620         * through virtio_set_features to sanity-check them against
3621         * host_features.
3622         */
3623        uint64_t features64 = vdev->guest_features;
3624        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3625            error_report("Features 0x%" PRIx64 " unsupported. "
3626                         "Allowed features: 0x%" PRIx64,
3627                         features64, vdev->host_features);
3628            return -1;
3629        }
3630    } else {
3631        if (virtio_set_features_nocheck(vdev, features) < 0) {
3632            error_report("Features 0x%x unsupported. "
3633                         "Allowed features: 0x%" PRIx64,
3634                         features, vdev->host_features);
3635            return -1;
3636        }
3637    }
3638
3639    if (!virtio_device_started(vdev, vdev->status) &&
3640        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3641        vdev->start_on_kick = true;
3642    }
3643
3644    RCU_READ_LOCK_GUARD();
3645    for (i = 0; i < num; i++) {
3646        if (vdev->vq[i].vring.desc) {
3647            uint16_t nheads;
3648
3649            /*
3650             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3651             * only the region cache needs to be set up.  Legacy devices need
3652             * to calculate used and avail ring addresses based on the desc
3653             * address.
3654             */
3655            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3656                virtio_init_region_cache(vdev, i);
3657            } else {
3658                virtio_queue_update_rings(vdev, i);
3659            }
3660
3661            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3662                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3663                vdev->vq[i].shadow_avail_wrap_counter =
3664                                        vdev->vq[i].last_avail_wrap_counter;
3665                continue;
3666            }
3667
3668            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3669            /* Check it isn't doing strange things with descriptor numbers. */
3670            if (nheads > vdev->vq[i].vring.num) {
3671                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3672                             "inconsistent with Host index 0x%x: delta 0x%x",
3673                             i, vdev->vq[i].vring.num,
3674                             vring_avail_idx(&vdev->vq[i]),
3675                             vdev->vq[i].last_avail_idx, nheads);
3676                vdev->vq[i].used_idx = 0;
3677                vdev->vq[i].shadow_avail_idx = 0;
3678                vdev->vq[i].inuse = 0;
3679                continue;
3680            }
3681            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3682            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3683
3684            /*
3685             * Some devices migrate VirtQueueElements that have been popped
3686             * from the avail ring but not yet returned to the used ring.
3687             * Since max ring size < UINT16_MAX it's safe to use modulo
3688             * UINT16_MAX + 1 subtraction.
3689             */
3690            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3691                                vdev->vq[i].used_idx);
3692            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3693                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3694                             "used_idx 0x%x",
3695                             i, vdev->vq[i].vring.num,
3696                             vdev->vq[i].last_avail_idx,
3697                             vdev->vq[i].used_idx);
3698                return -1;
3699            }
3700        }
3701    }
3702
3703    if (vdc->post_load) {
3704        ret = vdc->post_load(vdev);
3705        if (ret) {
3706            return ret;
3707        }
3708    }
3709
3710    return 0;
3711}
3712
3713void virtio_cleanup(VirtIODevice *vdev)
3714{
3715    qemu_del_vm_change_state_handler(vdev->vmstate);
3716}
3717
3718static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3719{
3720    VirtIODevice *vdev = opaque;
3721    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3722    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3723    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3724    vdev->vm_running = running;
3725
3726    if (backend_run) {
3727        virtio_set_status(vdev, vdev->status);
3728    }
3729
3730    if (k->vmstate_change) {
3731        k->vmstate_change(qbus->parent, backend_run);
3732    }
3733
3734    if (!backend_run) {
3735        virtio_set_status(vdev, vdev->status);
3736    }
3737}
3738
3739void virtio_instance_init_common(Object *proxy_obj, void *data,
3740                                 size_t vdev_size, const char *vdev_name)
3741{
3742    DeviceState *vdev = data;
3743
3744    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3745                                       vdev_size, vdev_name, &error_abort,
3746                                       NULL);
3747    qdev_alias_all_properties(vdev, proxy_obj);
3748}
3749
3750void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3751{
3752    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3753    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3754    int i;
3755    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3756
3757    if (nvectors) {
3758        vdev->vector_queues =
3759            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3760    }
3761
3762    vdev->start_on_kick = false;
3763    vdev->started = false;
3764    vdev->vhost_started = false;
3765    vdev->device_id = device_id;
3766    vdev->status = 0;
3767    qatomic_set(&vdev->isr, 0);
3768    vdev->queue_sel = 0;
3769    vdev->config_vector = VIRTIO_NO_VECTOR;
3770    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3771    vdev->vm_running = runstate_is_running();
3772    vdev->broken = false;
3773    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3774        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3775        vdev->vq[i].vdev = vdev;
3776        vdev->vq[i].queue_index = i;
3777        vdev->vq[i].host_notifier_enabled = false;
3778    }
3779
3780    vdev->name = virtio_id_to_name(device_id);
3781    vdev->config_len = config_size;
3782    if (vdev->config_len) {
3783        vdev->config = g_malloc0(config_size);
3784    } else {
3785        vdev->config = NULL;
3786    }
3787    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3788            virtio_vmstate_change, vdev);
3789    vdev->device_endian = virtio_default_endian();
3790    vdev->use_guest_notifier_mask = true;
3791}
3792
3793/*
3794 * Only devices that have already been around prior to defining the virtio
3795 * standard support legacy mode; this includes devices not specified in the
3796 * standard. All newer devices conform to the virtio standard only.
3797 */
3798bool virtio_legacy_allowed(VirtIODevice *vdev)
3799{
3800    switch (vdev->device_id) {
3801    case VIRTIO_ID_NET:
3802    case VIRTIO_ID_BLOCK:
3803    case VIRTIO_ID_CONSOLE:
3804    case VIRTIO_ID_RNG:
3805    case VIRTIO_ID_BALLOON:
3806    case VIRTIO_ID_RPMSG:
3807    case VIRTIO_ID_SCSI:
3808    case VIRTIO_ID_9P:
3809    case VIRTIO_ID_RPROC_SERIAL:
3810    case VIRTIO_ID_CAIF:
3811        return true;
3812    default:
3813        return false;
3814    }
3815}
3816
3817bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3818{
3819    return vdev->disable_legacy_check;
3820}
3821
3822hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3823{
3824    return vdev->vq[n].vring.desc;
3825}
3826
3827bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3828{
3829    return virtio_queue_get_desc_addr(vdev, n) != 0;
3830}
3831
3832bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3833{
3834    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3835    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3836
3837    if (k->queue_enabled) {
3838        return k->queue_enabled(qbus->parent, n);
3839    }
3840    return virtio_queue_enabled_legacy(vdev, n);
3841}
3842
3843hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3844{
3845    return vdev->vq[n].vring.avail;
3846}
3847
3848hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3849{
3850    return vdev->vq[n].vring.used;
3851}
3852
3853hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3854{
3855    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3856}
3857
3858hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3859{
3860    int s;
3861
3862    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3863        return sizeof(struct VRingPackedDescEvent);
3864    }
3865
3866    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3867    return offsetof(VRingAvail, ring) +
3868        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3869}
3870
3871hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3872{
3873    int s;
3874
3875    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3876        return sizeof(struct VRingPackedDescEvent);
3877    }
3878
3879    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3880    return offsetof(VRingUsed, ring) +
3881        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3882}
3883
3884static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3885                                                           int n)
3886{
3887    unsigned int avail, used;
3888
3889    avail = vdev->vq[n].last_avail_idx;
3890    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3891
3892    used = vdev->vq[n].used_idx;
3893    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3894
3895    return avail | used << 16;
3896}
3897
3898static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3899                                                      int n)
3900{
3901    return vdev->vq[n].last_avail_idx;
3902}
3903
3904unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3905{
3906    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3907        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3908    } else {
3909        return virtio_queue_split_get_last_avail_idx(vdev, n);
3910    }
3911}
3912
3913static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3914                                                   int n, unsigned int idx)
3915{
3916    struct VirtQueue *vq = &vdev->vq[n];
3917
3918    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3919    vq->last_avail_wrap_counter =
3920        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3921    idx >>= 16;
3922    vq->used_idx = idx & 0x7ffff;
3923    vq->used_wrap_counter = !!(idx & 0x8000);
3924}
3925
3926static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3927                                                  int n, unsigned int idx)
3928{
3929        vdev->vq[n].last_avail_idx = idx;
3930        vdev->vq[n].shadow_avail_idx = idx;
3931}
3932
3933void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3934                                     unsigned int idx)
3935{
3936    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3937        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3938    } else {
3939        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3940    }
3941}
3942
3943static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3944                                                       int n)
3945{
3946    /* We don't have a reference like avail idx in shared memory */
3947    return;
3948}
3949
3950static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3951                                                      int n)
3952{
3953    RCU_READ_LOCK_GUARD();
3954    if (vdev->vq[n].vring.desc) {
3955        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3956        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3957    }
3958}
3959
3960void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3961{
3962    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3963        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3964    } else {
3965        virtio_queue_split_restore_last_avail_idx(vdev, n);
3966    }
3967}
3968
3969static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3970{
3971    /* used idx was updated through set_last_avail_idx() */
3972    return;
3973}
3974
3975static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3976{
3977    RCU_READ_LOCK_GUARD();
3978    if (vdev->vq[n].vring.desc) {
3979        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3980    }
3981}
3982
3983void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3984{
3985    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3986        return virtio_queue_packed_update_used_idx(vdev, n);
3987    } else {
3988        return virtio_split_packed_update_used_idx(vdev, n);
3989    }
3990}
3991
3992void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3993{
3994    vdev->vq[n].signalled_used_valid = false;
3995}
3996
3997VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3998{
3999    return vdev->vq + n;
4000}
4001
4002uint16_t virtio_get_queue_index(VirtQueue *vq)
4003{
4004    return vq->queue_index;
4005}
4006
4007static void virtio_queue_guest_notifier_read(EventNotifier *n)
4008{
4009    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
4010    if (event_notifier_test_and_clear(n)) {
4011        virtio_irq(vq);
4012    }
4013}
4014
4015void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
4016                                                bool with_irqfd)
4017{
4018    if (assign && !with_irqfd) {
4019        event_notifier_set_handler(&vq->guest_notifier,
4020                                   virtio_queue_guest_notifier_read);
4021    } else {
4022        event_notifier_set_handler(&vq->guest_notifier, NULL);
4023    }
4024    if (!assign) {
4025        /* Test and clear notifier before closing it,
4026         * in case poll callback didn't have time to run. */
4027        virtio_queue_guest_notifier_read(&vq->guest_notifier);
4028    }
4029}
4030
4031EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
4032{
4033    return &vq->guest_notifier;
4034}
4035
4036static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
4037{
4038    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4039
4040    virtio_queue_set_notification(vq, 0);
4041}
4042
4043static bool virtio_queue_host_notifier_aio_poll(void *opaque)
4044{
4045    EventNotifier *n = opaque;
4046    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4047
4048    return vq->vring.desc && !virtio_queue_empty(vq);
4049}
4050
4051static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
4052{
4053    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4054
4055    virtio_queue_notify_vq(vq);
4056}
4057
4058static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
4059{
4060    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4061
4062    /* Caller polls once more after this to catch requests that race with us */
4063    virtio_queue_set_notification(vq, 1);
4064}
4065
4066void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
4067{
4068    aio_set_event_notifier(ctx, &vq->host_notifier, true,
4069                           virtio_queue_host_notifier_read,
4070                           virtio_queue_host_notifier_aio_poll,
4071                           virtio_queue_host_notifier_aio_poll_ready);
4072    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
4073                                virtio_queue_host_notifier_aio_poll_begin,
4074                                virtio_queue_host_notifier_aio_poll_end);
4075}
4076
4077/*
4078 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
4079 * this for rx virtqueues and similar cases where the virtqueue handler
4080 * function does not pop all elements. When the virtqueue is left non-empty
4081 * polling consumes CPU cycles and should not be used.
4082 */
4083void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
4084{
4085    aio_set_event_notifier(ctx, &vq->host_notifier, true,
4086                           virtio_queue_host_notifier_read,
4087                           NULL, NULL);
4088}
4089
4090void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
4091{
4092    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
4093    /* Test and clear notifier before after disabling event,
4094     * in case poll callback didn't have time to run. */
4095    virtio_queue_host_notifier_read(&vq->host_notifier);
4096}
4097
4098void virtio_queue_host_notifier_read(EventNotifier *n)
4099{
4100    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
4101    if (event_notifier_test_and_clear(n)) {
4102        virtio_queue_notify_vq(vq);
4103    }
4104}
4105
4106EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
4107{
4108    return &vq->host_notifier;
4109}
4110
4111void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
4112{
4113    vq->host_notifier_enabled = enabled;
4114}
4115
4116int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
4117                                      MemoryRegion *mr, bool assign)
4118{
4119    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4120    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
4121
4122    if (k->set_host_notifier_mr) {
4123        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
4124    }
4125
4126    return -1;
4127}
4128
4129void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
4130{
4131    g_free(vdev->bus_name);
4132    vdev->bus_name = g_strdup(bus_name);
4133}
4134
4135void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
4136{
4137    va_list ap;
4138
4139    va_start(ap, fmt);
4140    error_vreport(fmt, ap);
4141    va_end(ap);
4142
4143    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
4144        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
4145        virtio_notify_config(vdev);
4146    }
4147
4148    vdev->broken = true;
4149}
4150
4151static void virtio_memory_listener_commit(MemoryListener *listener)
4152{
4153    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
4154    int i;
4155
4156    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4157        if (vdev->vq[i].vring.num == 0) {
4158            break;
4159        }
4160        virtio_init_region_cache(vdev, i);
4161    }
4162}
4163
4164static void virtio_device_realize(DeviceState *dev, Error **errp)
4165{
4166    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4167    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4168    Error *err = NULL;
4169
4170    /* Devices should either use vmsd or the load/save methods */
4171    assert(!vdc->vmsd || !vdc->load);
4172
4173    if (vdc->realize != NULL) {
4174        vdc->realize(dev, &err);
4175        if (err != NULL) {
4176            error_propagate(errp, err);
4177            return;
4178        }
4179    }
4180
4181    virtio_bus_device_plugged(vdev, &err);
4182    if (err != NULL) {
4183        error_propagate(errp, err);
4184        vdc->unrealize(dev);
4185        return;
4186    }
4187
4188    vdev->listener.commit = virtio_memory_listener_commit;
4189    vdev->listener.name = "virtio";
4190    memory_listener_register(&vdev->listener, vdev->dma_as);
4191    QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
4192}
4193
4194static void virtio_device_unrealize(DeviceState *dev)
4195{
4196    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4197    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4198
4199    memory_listener_unregister(&vdev->listener);
4200    virtio_bus_device_unplugged(vdev);
4201
4202    if (vdc->unrealize != NULL) {
4203        vdc->unrealize(dev);
4204    }
4205
4206    QTAILQ_REMOVE(&virtio_list, vdev, next);
4207    g_free(vdev->bus_name);
4208    vdev->bus_name = NULL;
4209}
4210
4211static void virtio_device_free_virtqueues(VirtIODevice *vdev)
4212{
4213    int i;
4214    if (!vdev->vq) {
4215        return;
4216    }
4217
4218    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4219        if (vdev->vq[i].vring.num == 0) {
4220            break;
4221        }
4222        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
4223    }
4224    g_free(vdev->vq);
4225}
4226
4227static void virtio_device_instance_finalize(Object *obj)
4228{
4229    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
4230
4231    virtio_device_free_virtqueues(vdev);
4232
4233    g_free(vdev->config);
4234    g_free(vdev->vector_queues);
4235}
4236
4237static Property virtio_properties[] = {
4238    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
4239    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
4240    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
4241    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
4242                     disable_legacy_check, false),
4243    DEFINE_PROP_END_OF_LIST(),
4244};
4245
4246static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
4247{
4248    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4249    int i, n, r, err;
4250
4251    /*
4252     * Batch all the host notifiers in a single transaction to avoid
4253     * quadratic time complexity in address_space_update_ioeventfds().
4254     */
4255    memory_region_transaction_begin();
4256    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4257        VirtQueue *vq = &vdev->vq[n];
4258        if (!virtio_queue_get_num(vdev, n)) {
4259            continue;
4260        }
4261        r = virtio_bus_set_host_notifier(qbus, n, true);
4262        if (r < 0) {
4263            err = r;
4264            goto assign_error;
4265        }
4266        event_notifier_set_handler(&vq->host_notifier,
4267                                   virtio_queue_host_notifier_read);
4268    }
4269
4270    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4271        /* Kick right away to begin processing requests already in vring */
4272        VirtQueue *vq = &vdev->vq[n];
4273        if (!vq->vring.num) {
4274            continue;
4275        }
4276        event_notifier_set(&vq->host_notifier);
4277    }
4278    memory_region_transaction_commit();
4279    return 0;
4280
4281assign_error:
4282    i = n; /* save n for a second iteration after transaction is committed. */
4283    while (--n >= 0) {
4284        VirtQueue *vq = &vdev->vq[n];
4285        if (!virtio_queue_get_num(vdev, n)) {
4286            continue;
4287        }
4288
4289        event_notifier_set_handler(&vq->host_notifier, NULL);
4290        r = virtio_bus_set_host_notifier(qbus, n, false);
4291        assert(r >= 0);
4292    }
4293    /*
4294     * The transaction expects the ioeventfds to be open when it
4295     * commits. Do it now, before the cleanup loop.
4296     */
4297    memory_region_transaction_commit();
4298
4299    while (--i >= 0) {
4300        if (!virtio_queue_get_num(vdev, i)) {
4301            continue;
4302        }
4303        virtio_bus_cleanup_host_notifier(qbus, i);
4304    }
4305    return err;
4306}
4307
4308int virtio_device_start_ioeventfd(VirtIODevice *vdev)
4309{
4310    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4311    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4312
4313    return virtio_bus_start_ioeventfd(vbus);
4314}
4315
4316static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
4317{
4318    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4319    int n, r;
4320
4321    /*
4322     * Batch all the host notifiers in a single transaction to avoid
4323     * quadratic time complexity in address_space_update_ioeventfds().
4324     */
4325    memory_region_transaction_begin();
4326    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4327        VirtQueue *vq = &vdev->vq[n];
4328
4329        if (!virtio_queue_get_num(vdev, n)) {
4330            continue;
4331        }
4332        event_notifier_set_handler(&vq->host_notifier, NULL);
4333        r = virtio_bus_set_host_notifier(qbus, n, false);
4334        assert(r >= 0);
4335    }
4336    /*
4337     * The transaction expects the ioeventfds to be open when it
4338     * commits. Do it now, before the cleanup loop.
4339     */
4340    memory_region_transaction_commit();
4341
4342    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4343        if (!virtio_queue_get_num(vdev, n)) {
4344            continue;
4345        }
4346        virtio_bus_cleanup_host_notifier(qbus, n);
4347    }
4348}
4349
4350int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
4351{
4352    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4353    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4354
4355    return virtio_bus_grab_ioeventfd(vbus);
4356}
4357
4358void virtio_device_release_ioeventfd(VirtIODevice *vdev)
4359{
4360    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4361    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4362
4363    virtio_bus_release_ioeventfd(vbus);
4364}
4365
4366static void virtio_device_class_init(ObjectClass *klass, void *data)
4367{
4368    /* Set the default value here. */
4369    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4370    DeviceClass *dc = DEVICE_CLASS(klass);
4371
4372    dc->realize = virtio_device_realize;
4373    dc->unrealize = virtio_device_unrealize;
4374    dc->bus_type = TYPE_VIRTIO_BUS;
4375    device_class_set_props(dc, virtio_properties);
4376    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
4377    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
4378
4379    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
4380
4381    QTAILQ_INIT(&virtio_list);
4382}
4383
4384bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
4385{
4386    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4387    VirtioBusState *vbus = VIRTIO_BUS(qbus);
4388
4389    return virtio_bus_ioeventfd_enabled(vbus);
4390}
4391
4392VirtioInfoList *qmp_x_query_virtio(Error **errp)
4393{
4394    VirtioInfoList *list = NULL;
4395    VirtioInfoList *node;
4396    VirtIODevice *vdev;
4397
4398    QTAILQ_FOREACH(vdev, &virtio_list, next) {
4399        DeviceState *dev = DEVICE(vdev);
4400        Error *err = NULL;
4401        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4402
4403        if (err == NULL) {
4404            GString *is_realized = qobject_to_json_pretty(obj, true);
4405            /* virtio device is NOT realized, remove it from list */
4406            if (!strncmp(is_realized->str, "false", 4)) {
4407                QTAILQ_REMOVE(&virtio_list, vdev, next);
4408            } else {
4409                node = g_new0(VirtioInfoList, 1);
4410                node->value = g_new(VirtioInfo, 1);
4411                node->value->path = g_strdup(dev->canonical_path);
4412                node->value->name = g_strdup(vdev->name);
4413                QAPI_LIST_PREPEND(list, node->value);
4414            }
4415           g_string_free(is_realized, true);
4416        }
4417        qobject_unref(obj);
4418    }
4419
4420    return list;
4421}
4422
4423static VirtIODevice *virtio_device_find(const char *path)
4424{
4425    VirtIODevice *vdev;
4426
4427    QTAILQ_FOREACH(vdev, &virtio_list, next) {
4428        DeviceState *dev = DEVICE(vdev);
4429
4430        if (strcmp(dev->canonical_path, path) != 0) {
4431            continue;
4432        }
4433
4434        Error *err = NULL;
4435        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
4436        if (err == NULL) {
4437            GString *is_realized = qobject_to_json_pretty(obj, true);
4438            /* virtio device is NOT realized, remove it from list */
4439            if (!strncmp(is_realized->str, "false", 4)) {
4440                g_string_free(is_realized, true);
4441                qobject_unref(obj);
4442                QTAILQ_REMOVE(&virtio_list, vdev, next);
4443                return NULL;
4444            }
4445            g_string_free(is_realized, true);
4446        } else {
4447            /* virtio device doesn't exist in QOM tree */
4448            QTAILQ_REMOVE(&virtio_list, vdev, next);
4449            qobject_unref(obj);
4450            return NULL;
4451        }
4452        /* device exists in QOM tree & is realized */
4453        qobject_unref(obj);
4454        return vdev;
4455    }
4456    return NULL;
4457}
4458
4459#define CONVERT_FEATURES(type, map, is_status, bitmap)   \
4460    ({                                                   \
4461        type *list = NULL;                               \
4462        type *node;                                      \
4463        for (i = 0; map[i].virtio_bit != -1; i++) {      \
4464            if (is_status) {                             \
4465                bit = map[i].virtio_bit;                 \
4466            }                                            \
4467            else {                                       \
4468                bit = 1ULL << map[i].virtio_bit;         \
4469            }                                            \
4470            if ((bitmap & bit) == 0) {                   \
4471                continue;                                \
4472            }                                            \
4473            node = g_new0(type, 1);                      \
4474            node->value = g_strdup(map[i].feature_desc); \
4475            node->next = list;                           \
4476            list = node;                                 \
4477            bitmap ^= bit;                               \
4478        }                                                \
4479        list;                                            \
4480    })
4481
4482static VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap)
4483{
4484    VirtioDeviceStatus *status;
4485    uint8_t bit;
4486    int i;
4487
4488    status = g_new0(VirtioDeviceStatus, 1);
4489    status->statuses = CONVERT_FEATURES(strList, virtio_config_status_map,
4490                                        1, bitmap);
4491    status->has_unknown_statuses = bitmap != 0;
4492    if (status->has_unknown_statuses) {
4493        status->unknown_statuses = bitmap;
4494    }
4495
4496    return status;
4497}
4498
4499static VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap)
4500{
4501    VhostDeviceProtocols *vhu_protocols;
4502    uint64_t bit;
4503    int i;
4504
4505    vhu_protocols = g_new0(VhostDeviceProtocols, 1);
4506    vhu_protocols->protocols =
4507                    CONVERT_FEATURES(strList,
4508                                     vhost_user_protocol_map, 0, bitmap);
4509    vhu_protocols->has_unknown_protocols = bitmap != 0;
4510    if (vhu_protocols->has_unknown_protocols) {
4511        vhu_protocols->unknown_protocols = bitmap;
4512    }
4513
4514    return vhu_protocols;
4515}
4516
4517static VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
4518                                                 uint64_t bitmap)
4519{
4520    VirtioDeviceFeatures *features;
4521    uint64_t bit;
4522    int i;
4523
4524    features = g_new0(VirtioDeviceFeatures, 1);
4525    features->has_dev_features = true;
4526
4527    /* transport features */
4528    features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0,
4529                                            bitmap);
4530
4531    /* device features */
4532    switch (device_id) {
4533#ifdef CONFIG_VIRTIO_SERIAL
4534    case VIRTIO_ID_CONSOLE:
4535        features->dev_features =
4536            CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap);
4537        break;
4538#endif
4539#ifdef CONFIG_VIRTIO_BLK
4540    case VIRTIO_ID_BLOCK:
4541        features->dev_features =
4542            CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap);
4543        break;
4544#endif
4545#ifdef CONFIG_VIRTIO_GPU
4546    case VIRTIO_ID_GPU:
4547        features->dev_features =
4548            CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap);
4549        break;
4550#endif
4551#ifdef CONFIG_VIRTIO_NET
4552    case VIRTIO_ID_NET:
4553        features->dev_features =
4554            CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap);
4555        break;
4556#endif
4557#ifdef CONFIG_VIRTIO_SCSI
4558    case VIRTIO_ID_SCSI:
4559        features->dev_features =
4560            CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap);
4561        break;
4562#endif
4563#ifdef CONFIG_VIRTIO_BALLOON
4564    case VIRTIO_ID_BALLOON:
4565        features->dev_features =
4566            CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap);
4567        break;
4568#endif
4569#ifdef CONFIG_VIRTIO_IOMMU
4570    case VIRTIO_ID_IOMMU:
4571        features->dev_features =
4572            CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap);
4573        break;
4574#endif
4575#ifdef CONFIG_VIRTIO_INPUT
4576    case VIRTIO_ID_INPUT:
4577        features->dev_features =
4578            CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap);
4579        break;
4580#endif
4581#ifdef CONFIG_VHOST_USER_FS
4582    case VIRTIO_ID_FS:
4583        features->dev_features =
4584            CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap);
4585        break;
4586#endif
4587#ifdef CONFIG_VHOST_VSOCK
4588    case VIRTIO_ID_VSOCK:
4589        features->dev_features =
4590            CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap);
4591        break;
4592#endif
4593#ifdef CONFIG_VIRTIO_CRYPTO
4594    case VIRTIO_ID_CRYPTO:
4595        features->dev_features =
4596            CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap);
4597        break;
4598#endif
4599#ifdef CONFIG_VIRTIO_MEM
4600    case VIRTIO_ID_MEM:
4601        features->dev_features =
4602            CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap);
4603        break;
4604#endif
4605#ifdef CONFIG_VIRTIO_I2C_ADAPTER
4606    case VIRTIO_ID_I2C_ADAPTER:
4607        features->dev_features =
4608            CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap);
4609        break;
4610#endif
4611#ifdef CONFIG_VIRTIO_RNG
4612    case VIRTIO_ID_RNG:
4613        features->dev_features =
4614            CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap);
4615        break;
4616#endif
4617    /* No features */
4618    case VIRTIO_ID_9P:
4619    case VIRTIO_ID_PMEM:
4620    case VIRTIO_ID_IOMEM:
4621    case VIRTIO_ID_RPMSG:
4622    case VIRTIO_ID_CLOCK:
4623    case VIRTIO_ID_MAC80211_WLAN:
4624    case VIRTIO_ID_MAC80211_HWSIM:
4625    case VIRTIO_ID_RPROC_SERIAL:
4626    case VIRTIO_ID_MEMORY_BALLOON:
4627    case VIRTIO_ID_CAIF:
4628    case VIRTIO_ID_SIGNAL_DIST:
4629    case VIRTIO_ID_PSTORE:
4630    case VIRTIO_ID_SOUND:
4631    case VIRTIO_ID_BT:
4632    case VIRTIO_ID_RPMB:
4633    case VIRTIO_ID_VIDEO_ENCODER:
4634    case VIRTIO_ID_VIDEO_DECODER:
4635    case VIRTIO_ID_SCMI:
4636    case VIRTIO_ID_NITRO_SEC_MOD:
4637    case VIRTIO_ID_WATCHDOG:
4638    case VIRTIO_ID_CAN:
4639    case VIRTIO_ID_DMABUF:
4640    case VIRTIO_ID_PARAM_SERV:
4641    case VIRTIO_ID_AUDIO_POLICY:
4642    case VIRTIO_ID_GPIO:
4643        break;
4644    default:
4645        g_assert_not_reached();
4646    }
4647
4648    features->has_unknown_dev_features = bitmap != 0;
4649    if (features->has_unknown_dev_features) {
4650        features->unknown_dev_features = bitmap;
4651    }
4652
4653    return features;
4654}
4655
4656VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
4657{
4658    VirtIODevice *vdev;
4659    VirtioStatus *status;
4660
4661    vdev = virtio_device_find(path);
4662    if (vdev == NULL) {
4663        error_setg(errp, "Path %s is not a VirtIODevice", path);
4664        return NULL;
4665    }
4666
4667    status = g_new0(VirtioStatus, 1);
4668    status->name = g_strdup(vdev->name);
4669    status->device_id = vdev->device_id;
4670    status->vhost_started = vdev->vhost_started;
4671    status->guest_features = qmp_decode_features(vdev->device_id,
4672                                                 vdev->guest_features);
4673    status->host_features = qmp_decode_features(vdev->device_id,
4674                                                vdev->host_features);
4675    status->backend_features = qmp_decode_features(vdev->device_id,
4676                                                   vdev->backend_features);
4677
4678    switch (vdev->device_endian) {
4679    case VIRTIO_DEVICE_ENDIAN_LITTLE:
4680        status->device_endian = g_strdup("little");
4681        break;
4682    case VIRTIO_DEVICE_ENDIAN_BIG:
4683        status->device_endian = g_strdup("big");
4684        break;
4685    default:
4686        status->device_endian = g_strdup("unknown");
4687        break;
4688    }
4689
4690    status->num_vqs = virtio_get_num_queues(vdev);
4691    status->status = qmp_decode_status(vdev->status);
4692    status->isr = vdev->isr;
4693    status->queue_sel = vdev->queue_sel;
4694    status->vm_running = vdev->vm_running;
4695    status->broken = vdev->broken;
4696    status->disabled = vdev->disabled;
4697    status->use_started = vdev->use_started;
4698    status->started = vdev->started;
4699    status->start_on_kick = vdev->start_on_kick;
4700    status->disable_legacy_check = vdev->disable_legacy_check;
4701    status->bus_name = g_strdup(vdev->bus_name);
4702    status->use_guest_notifier_mask = vdev->use_guest_notifier_mask;
4703    status->has_vhost_dev = vdev->vhost_started;
4704
4705    if (vdev->vhost_started) {
4706        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4707        struct vhost_dev *hdev = vdc->get_vhost(vdev);
4708
4709        status->vhost_dev = g_new0(VhostStatus, 1);
4710        status->vhost_dev->n_mem_sections = hdev->n_mem_sections;
4711        status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections;
4712        status->vhost_dev->nvqs = hdev->nvqs;
4713        status->vhost_dev->vq_index = hdev->vq_index;
4714        status->vhost_dev->features =
4715            qmp_decode_features(vdev->device_id, hdev->features);
4716        status->vhost_dev->acked_features =
4717            qmp_decode_features(vdev->device_id, hdev->acked_features);
4718        status->vhost_dev->backend_features =
4719            qmp_decode_features(vdev->device_id, hdev->backend_features);
4720        status->vhost_dev->protocol_features =
4721            qmp_decode_protocols(hdev->protocol_features);
4722        status->vhost_dev->max_queues = hdev->max_queues;
4723        status->vhost_dev->backend_cap = hdev->backend_cap;
4724        status->vhost_dev->log_enabled = hdev->log_enabled;
4725        status->vhost_dev->log_size = hdev->log_size;
4726    }
4727
4728    return status;
4729}
4730
4731VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
4732                                                            uint16_t queue,
4733                                                            Error **errp)
4734{
4735    VirtIODevice *vdev;
4736    VirtVhostQueueStatus *status;
4737
4738    vdev = virtio_device_find(path);
4739    if (vdev == NULL) {
4740        error_setg(errp, "Path %s is not a VirtIODevice", path);
4741        return NULL;
4742    }
4743
4744    if (!vdev->vhost_started) {
4745        error_setg(errp, "Error: vhost device has not started yet");
4746        return NULL;
4747    }
4748
4749    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4750    struct vhost_dev *hdev = vdc->get_vhost(vdev);
4751
4752    if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) {
4753        error_setg(errp, "Invalid vhost virtqueue number %d", queue);
4754        return NULL;
4755    }
4756
4757    status = g_new0(VirtVhostQueueStatus, 1);
4758    status->name = g_strdup(vdev->name);
4759    status->kick = hdev->vqs[queue].kick;
4760    status->call = hdev->vqs[queue].call;
4761    status->desc = (uintptr_t)hdev->vqs[queue].desc;
4762    status->avail = (uintptr_t)hdev->vqs[queue].avail;
4763    status->used = (uintptr_t)hdev->vqs[queue].used;
4764    status->num = hdev->vqs[queue].num;
4765    status->desc_phys = hdev->vqs[queue].desc_phys;
4766    status->desc_size = hdev->vqs[queue].desc_size;
4767    status->avail_phys = hdev->vqs[queue].avail_phys;
4768    status->avail_size = hdev->vqs[queue].avail_size;
4769    status->used_phys = hdev->vqs[queue].used_phys;
4770    status->used_size = hdev->vqs[queue].used_size;
4771
4772    return status;
4773}
4774
4775VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4776                                                 uint16_t queue,
4777                                                 Error **errp)
4778{
4779    VirtIODevice *vdev;
4780    VirtQueueStatus *status;
4781
4782    vdev = virtio_device_find(path);
4783    if (vdev == NULL) {
4784        error_setg(errp, "Path %s is not a VirtIODevice", path);
4785        return NULL;
4786    }
4787
4788    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4789        error_setg(errp, "Invalid virtqueue number %d", queue);
4790        return NULL;
4791    }
4792
4793    status = g_new0(VirtQueueStatus, 1);
4794    status->name = g_strdup(vdev->name);
4795    status->queue_index = vdev->vq[queue].queue_index;
4796    status->inuse = vdev->vq[queue].inuse;
4797    status->vring_num = vdev->vq[queue].vring.num;
4798    status->vring_num_default = vdev->vq[queue].vring.num_default;
4799    status->vring_align = vdev->vq[queue].vring.align;
4800    status->vring_desc = vdev->vq[queue].vring.desc;
4801    status->vring_avail = vdev->vq[queue].vring.avail;
4802    status->vring_used = vdev->vq[queue].vring.used;
4803    status->used_idx = vdev->vq[queue].used_idx;
4804    status->signalled_used = vdev->vq[queue].signalled_used;
4805    status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4806
4807    if (vdev->vhost_started) {
4808        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4809        struct vhost_dev *hdev = vdc->get_vhost(vdev);
4810
4811        /* check if vq index exists for vhost as well  */
4812        if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4813            status->has_last_avail_idx = true;
4814
4815            int vhost_vq_index =
4816                hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4817            struct vhost_vring_state state = {
4818                .index = vhost_vq_index,
4819            };
4820
4821            status->last_avail_idx =
4822                hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4823        }
4824    } else {
4825        status->has_shadow_avail_idx = true;
4826        status->has_last_avail_idx = true;
4827        status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4828        status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4829    }
4830
4831    return status;
4832}
4833
4834static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4835{
4836    strList *list = NULL;
4837    strList *node;
4838    int i;
4839
4840    struct {
4841        uint16_t flag;
4842        const char *value;
4843    } map[] = {
4844        { VRING_DESC_F_NEXT, "next" },
4845        { VRING_DESC_F_WRITE, "write" },
4846        { VRING_DESC_F_INDIRECT, "indirect" },
4847        { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4848        { 1 << VRING_PACKED_DESC_F_USED, "used" },
4849        { 0, "" }
4850    };
4851
4852    for (i = 0; map[i].flag; i++) {
4853        if ((map[i].flag & flags) == 0) {
4854            continue;
4855        }
4856        node = g_malloc0(sizeof(strList));
4857        node->value = g_strdup(map[i].value);
4858        node->next = list;
4859        list = node;
4860    }
4861
4862    return list;
4863}
4864
4865VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4866                                                     uint16_t queue,
4867                                                     bool has_index,
4868                                                     uint16_t index,
4869                                                     Error **errp)
4870{
4871    VirtIODevice *vdev;
4872    VirtQueue *vq;
4873    VirtioQueueElement *element = NULL;
4874
4875    vdev = virtio_device_find(path);
4876    if (vdev == NULL) {
4877        error_setg(errp, "Path %s is not a VirtIO device", path);
4878        return NULL;
4879    }
4880
4881    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4882        error_setg(errp, "Invalid virtqueue number %d", queue);
4883        return NULL;
4884    }
4885    vq = &vdev->vq[queue];
4886
4887    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4888        error_setg(errp, "Packed ring not supported");
4889        return NULL;
4890    } else {
4891        unsigned int head, i, max;
4892        VRingMemoryRegionCaches *caches;
4893        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
4894        MemoryRegionCache *desc_cache;
4895        VRingDesc desc;
4896        VirtioRingDescList *list = NULL;
4897        VirtioRingDescList *node;
4898        int rc; int ndescs;
4899
4900        RCU_READ_LOCK_GUARD();
4901
4902        max = vq->vring.num;
4903
4904        if (!has_index) {
4905            head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4906        } else {
4907            head = vring_avail_ring(vq, index % vq->vring.num);
4908        }
4909        i = head;
4910
4911        caches = vring_get_region_caches(vq);
4912        if (!caches) {
4913            error_setg(errp, "Region caches not initialized");
4914            return NULL;
4915        }
4916        if (caches->desc.len < max * sizeof(VRingDesc)) {
4917            error_setg(errp, "Cannot map descriptor ring");
4918            return NULL;
4919        }
4920
4921        desc_cache = &caches->desc;
4922        vring_split_desc_read(vdev, &desc, desc_cache, i);
4923        if (desc.flags & VRING_DESC_F_INDIRECT) {
4924            int64_t len;
4925            len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4926                                           desc.addr, desc.len, false);
4927            desc_cache = &indirect_desc_cache;
4928            if (len < desc.len) {
4929                error_setg(errp, "Cannot map indirect buffer");
4930                goto done;
4931            }
4932
4933            max = desc.len / sizeof(VRingDesc);
4934            i = 0;
4935            vring_split_desc_read(vdev, &desc, desc_cache, i);
4936        }
4937
4938        element = g_new0(VirtioQueueElement, 1);
4939        element->avail = g_new0(VirtioRingAvail, 1);
4940        element->used = g_new0(VirtioRingUsed, 1);
4941        element->name = g_strdup(vdev->name);
4942        element->index = head;
4943        element->avail->flags = vring_avail_flags(vq);
4944        element->avail->idx = vring_avail_idx(vq);
4945        element->avail->ring = head;
4946        element->used->flags = vring_used_flags(vq);
4947        element->used->idx = vring_used_idx(vq);
4948        ndescs = 0;
4949
4950        do {
4951            /* A buggy driver may produce an infinite loop */
4952            if (ndescs >= max) {
4953                break;
4954            }
4955            node = g_new0(VirtioRingDescList, 1);
4956            node->value = g_new0(VirtioRingDesc, 1);
4957            node->value->addr = desc.addr;
4958            node->value->len = desc.len;
4959            node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4960            node->next = list;
4961            list = node;
4962
4963            ndescs++;
4964            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4965                                                max, &i);
4966        } while (rc == VIRTQUEUE_READ_DESC_MORE);
4967        element->descs = list;
4968done:
4969        address_space_cache_destroy(&indirect_desc_cache);
4970    }
4971
4972    return element;
4973}
4974
4975static const TypeInfo virtio_device_info = {
4976    .name = TYPE_VIRTIO_DEVICE,
4977    .parent = TYPE_DEVICE,
4978    .instance_size = sizeof(VirtIODevice),
4979    .class_init = virtio_device_class_init,
4980    .instance_finalize = virtio_device_instance_finalize,
4981    .abstract = true,
4982    .class_size = sizeof(VirtioDeviceClass),
4983};
4984
4985static void virtio_register_types(void)
4986{
4987    type_register_static(&virtio_device_info);
4988}
4989
4990type_init(virtio_register_types)
4991