qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qapi/qapi-commands-virtio.h"
  17#include "trace.h"
  18#include "qemu/error-report.h"
  19#include "qemu/log.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "qom/object_interfaces.h"
  23#include "hw/core/cpu.h"
  24#include "hw/virtio/virtio.h"
  25#include "hw/virtio/vhost.h"
  26#include "migration/qemu-file-types.h"
  27#include "qemu/atomic.h"
  28#include "hw/virtio/virtio-bus.h"
  29#include "hw/qdev-properties.h"
  30#include "hw/virtio/virtio-access.h"
  31#include "sysemu/dma.h"
  32#include "sysemu/runstate.h"
  33#include "virtio-qmp.h"
  34
  35#include "standard-headers/linux/virtio_ids.h"
  36#include "standard-headers/linux/vhost_types.h"
  37#include "standard-headers/linux/virtio_blk.h"
  38#include "standard-headers/linux/virtio_console.h"
  39#include "standard-headers/linux/virtio_gpu.h"
  40#include "standard-headers/linux/virtio_net.h"
  41#include "standard-headers/linux/virtio_scsi.h"
  42#include "standard-headers/linux/virtio_i2c.h"
  43#include "standard-headers/linux/virtio_balloon.h"
  44#include "standard-headers/linux/virtio_iommu.h"
  45#include "standard-headers/linux/virtio_mem.h"
  46#include "standard-headers/linux/virtio_vsock.h"
  47
  48QmpVirtIODeviceList virtio_list;
  49
  50/*
  51 * Maximum size of virtio device config space
  52 */
  53#define VHOST_USER_MAX_CONFIG_SIZE 256
  54
  55/*
  56 * The alignment to use between consumer and producer parts of vring.
  57 * x86 pagesize again. This is the default, used by transports like PCI
  58 * which don't provide a means for the guest to tell the host the alignment.
  59 */
  60#define VIRTIO_PCI_VRING_ALIGN         4096
  61
  62typedef struct VRingDesc
  63{
  64    uint64_t addr;
  65    uint32_t len;
  66    uint16_t flags;
  67    uint16_t next;
  68} VRingDesc;
  69
  70typedef struct VRingPackedDesc {
  71    uint64_t addr;
  72    uint32_t len;
  73    uint16_t id;
  74    uint16_t flags;
  75} VRingPackedDesc;
  76
  77typedef struct VRingAvail
  78{
  79    uint16_t flags;
  80    uint16_t idx;
  81    uint16_t ring[];
  82} VRingAvail;
  83
  84typedef struct VRingUsedElem
  85{
  86    uint32_t id;
  87    uint32_t len;
  88} VRingUsedElem;
  89
  90typedef struct VRingUsed
  91{
  92    uint16_t flags;
  93    uint16_t idx;
  94    VRingUsedElem ring[];
  95} VRingUsed;
  96
  97typedef struct VRingMemoryRegionCaches {
  98    struct rcu_head rcu;
  99    MemoryRegionCache desc;
 100    MemoryRegionCache avail;
 101    MemoryRegionCache used;
 102} VRingMemoryRegionCaches;
 103
 104typedef struct VRing
 105{
 106    unsigned int num;
 107    unsigned int num_default;
 108    unsigned int align;
 109    hwaddr desc;
 110    hwaddr avail;
 111    hwaddr used;
 112    VRingMemoryRegionCaches *caches;
 113} VRing;
 114
 115typedef struct VRingPackedDescEvent {
 116    uint16_t off_wrap;
 117    uint16_t flags;
 118} VRingPackedDescEvent ;
 119
 120struct VirtQueue
 121{
 122    VRing vring;
 123    VirtQueueElement *used_elems;
 124
 125    /* Next head to pop */
 126    uint16_t last_avail_idx;
 127    bool last_avail_wrap_counter;
 128
 129    /* Last avail_idx read from VQ. */
 130    uint16_t shadow_avail_idx;
 131    bool shadow_avail_wrap_counter;
 132
 133    uint16_t used_idx;
 134    bool used_wrap_counter;
 135
 136    /* Last used index value we have signalled on */
 137    uint16_t signalled_used;
 138
 139    /* Last used index value we have signalled on */
 140    bool signalled_used_valid;
 141
 142    /* Notification enabled? */
 143    bool notification;
 144
 145    uint16_t queue_index;
 146
 147    unsigned int inuse;
 148
 149    uint16_t vector;
 150    VirtIOHandleOutput handle_output;
 151    VirtIODevice *vdev;
 152    EventNotifier guest_notifier;
 153    EventNotifier host_notifier;
 154    bool host_notifier_enabled;
 155    QLIST_ENTRY(VirtQueue) node;
 156};
 157
 158const char *virtio_device_names[] = {
 159    [VIRTIO_ID_NET] = "virtio-net",
 160    [VIRTIO_ID_BLOCK] = "virtio-blk",
 161    [VIRTIO_ID_CONSOLE] = "virtio-serial",
 162    [VIRTIO_ID_RNG] = "virtio-rng",
 163    [VIRTIO_ID_BALLOON] = "virtio-balloon",
 164    [VIRTIO_ID_IOMEM] = "virtio-iomem",
 165    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
 166    [VIRTIO_ID_SCSI] = "virtio-scsi",
 167    [VIRTIO_ID_9P] = "virtio-9p",
 168    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
 169    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
 170    [VIRTIO_ID_CAIF] = "virtio-caif",
 171    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
 172    [VIRTIO_ID_GPU] = "virtio-gpu",
 173    [VIRTIO_ID_CLOCK] = "virtio-clk",
 174    [VIRTIO_ID_INPUT] = "virtio-input",
 175    [VIRTIO_ID_VSOCK] = "vhost-vsock",
 176    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
 177    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
 178    [VIRTIO_ID_PSTORE] = "virtio-pstore",
 179    [VIRTIO_ID_IOMMU] = "virtio-iommu",
 180    [VIRTIO_ID_MEM] = "virtio-mem",
 181    [VIRTIO_ID_SOUND] = "virtio-sound",
 182    [VIRTIO_ID_FS] = "virtio-user-fs",
 183    [VIRTIO_ID_PMEM] = "virtio-pmem",
 184    [VIRTIO_ID_RPMB] = "virtio-rpmb",
 185    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
 186    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
 187    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
 188    [VIRTIO_ID_SCMI] = "virtio-scmi",
 189    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
 190    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
 191    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
 192    [VIRTIO_ID_CAN] = "virtio-can",
 193    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
 194    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
 195    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
 196    [VIRTIO_ID_BT] = "virtio-bluetooth",
 197    [VIRTIO_ID_GPIO] = "virtio-gpio"
 198};
 199
 200static const char *virtio_id_to_name(uint16_t device_id)
 201{
 202    assert(device_id < G_N_ELEMENTS(virtio_device_names));
 203    const char *name = virtio_device_names[device_id];
 204    assert(name != NULL);
 205    return name;
 206}
 207
 208/* Called within call_rcu().  */
 209static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 210{
 211    assert(caches != NULL);
 212    address_space_cache_destroy(&caches->desc);
 213    address_space_cache_destroy(&caches->avail);
 214    address_space_cache_destroy(&caches->used);
 215    g_free(caches);
 216}
 217
 218static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 219{
 220    VRingMemoryRegionCaches *caches;
 221
 222    caches = qatomic_read(&vq->vring.caches);
 223    qatomic_rcu_set(&vq->vring.caches, NULL);
 224    if (caches) {
 225        call_rcu(caches, virtio_free_region_cache, rcu);
 226    }
 227}
 228
 229static void virtio_init_region_cache(VirtIODevice *vdev, int n)
 230{
 231    VirtQueue *vq = &vdev->vq[n];
 232    VRingMemoryRegionCaches *old = vq->vring.caches;
 233    VRingMemoryRegionCaches *new = NULL;
 234    hwaddr addr, size;
 235    int64_t len;
 236    bool packed;
 237
 238
 239    addr = vq->vring.desc;
 240    if (!addr) {
 241        goto out_no_cache;
 242    }
 243    new = g_new0(VRingMemoryRegionCaches, 1);
 244    size = virtio_queue_get_desc_size(vdev, n);
 245    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 246                                   true : false;
 247    len = address_space_cache_init(&new->desc, vdev->dma_as,
 248                                   addr, size, packed);
 249    if (len < size) {
 250        virtio_error(vdev, "Cannot map desc");
 251        goto err_desc;
 252    }
 253
 254    size = virtio_queue_get_used_size(vdev, n);
 255    len = address_space_cache_init(&new->used, vdev->dma_as,
 256                                   vq->vring.used, size, true);
 257    if (len < size) {
 258        virtio_error(vdev, "Cannot map used");
 259        goto err_used;
 260    }
 261
 262    size = virtio_queue_get_avail_size(vdev, n);
 263    len = address_space_cache_init(&new->avail, vdev->dma_as,
 264                                   vq->vring.avail, size, false);
 265    if (len < size) {
 266        virtio_error(vdev, "Cannot map avail");
 267        goto err_avail;
 268    }
 269
 270    qatomic_rcu_set(&vq->vring.caches, new);
 271    if (old) {
 272        call_rcu(old, virtio_free_region_cache, rcu);
 273    }
 274    return;
 275
 276err_avail:
 277    address_space_cache_destroy(&new->avail);
 278err_used:
 279    address_space_cache_destroy(&new->used);
 280err_desc:
 281    address_space_cache_destroy(&new->desc);
 282out_no_cache:
 283    g_free(new);
 284    virtio_virtqueue_reset_region_cache(vq);
 285}
 286
 287/* virt queue functions */
 288void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 289{
 290    VRing *vring = &vdev->vq[n].vring;
 291
 292    if (!vring->num || !vring->desc || !vring->align) {
 293        /* not yet setup -> nothing to do */
 294        return;
 295    }
 296    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 297    vring->used = vring_align(vring->avail +
 298                              offsetof(VRingAvail, ring[vring->num]),
 299                              vring->align);
 300    virtio_init_region_cache(vdev, n);
 301}
 302
 303/* Called within rcu_read_lock().  */
 304static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 305                                  MemoryRegionCache *cache, int i)
 306{
 307    address_space_read_cached(cache, i * sizeof(VRingDesc),
 308                              desc, sizeof(VRingDesc));
 309    virtio_tswap64s(vdev, &desc->addr);
 310    virtio_tswap32s(vdev, &desc->len);
 311    virtio_tswap16s(vdev, &desc->flags);
 312    virtio_tswap16s(vdev, &desc->next);
 313}
 314
 315static void vring_packed_event_read(VirtIODevice *vdev,
 316                                    MemoryRegionCache *cache,
 317                                    VRingPackedDescEvent *e)
 318{
 319    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 320    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 321
 322    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 323    /* Make sure flags is seen before off_wrap */
 324    smp_rmb();
 325    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 326    virtio_tswap16s(vdev, &e->flags);
 327}
 328
 329static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 330                                        MemoryRegionCache *cache,
 331                                        uint16_t off_wrap)
 332{
 333    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 334
 335    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 336    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 337}
 338
 339static void vring_packed_flags_write(VirtIODevice *vdev,
 340                                     MemoryRegionCache *cache, uint16_t flags)
 341{
 342    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 343
 344    virtio_stw_phys_cached(vdev, cache, off, flags);
 345    address_space_cache_invalidate(cache, off, sizeof(flags));
 346}
 347
 348/* Called within rcu_read_lock().  */
 349static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 350{
 351    return qatomic_rcu_read(&vq->vring.caches);
 352}
 353
 354/* Called within rcu_read_lock().  */
 355static inline uint16_t vring_avail_flags(VirtQueue *vq)
 356{
 357    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 358    hwaddr pa = offsetof(VRingAvail, flags);
 359
 360    if (!caches) {
 361        return 0;
 362    }
 363
 364    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 365}
 366
 367/* Called within rcu_read_lock().  */
 368static inline uint16_t vring_avail_idx(VirtQueue *vq)
 369{
 370    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 371    hwaddr pa = offsetof(VRingAvail, idx);
 372
 373    if (!caches) {
 374        return 0;
 375    }
 376
 377    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 378    return vq->shadow_avail_idx;
 379}
 380
 381/* Called within rcu_read_lock().  */
 382static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 383{
 384    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 385    hwaddr pa = offsetof(VRingAvail, ring[i]);
 386
 387    if (!caches) {
 388        return 0;
 389    }
 390
 391    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 392}
 393
 394/* Called within rcu_read_lock().  */
 395static inline uint16_t vring_get_used_event(VirtQueue *vq)
 396{
 397    return vring_avail_ring(vq, vq->vring.num);
 398}
 399
 400/* Called within rcu_read_lock().  */
 401static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 402                                    int i)
 403{
 404    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 405    hwaddr pa = offsetof(VRingUsed, ring[i]);
 406
 407    if (!caches) {
 408        return;
 409    }
 410
 411    virtio_tswap32s(vq->vdev, &uelem->id);
 412    virtio_tswap32s(vq->vdev, &uelem->len);
 413    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 414    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 415}
 416
 417/* Called within rcu_read_lock(). */
 418static inline uint16_t vring_used_flags(VirtQueue *vq)
 419{
 420    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 421    hwaddr pa = offsetof(VRingUsed, flags);
 422
 423    if (!caches) {
 424        return 0;
 425    }
 426
 427    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 428}
 429
 430/* Called within rcu_read_lock().  */
 431static uint16_t vring_used_idx(VirtQueue *vq)
 432{
 433    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 434    hwaddr pa = offsetof(VRingUsed, idx);
 435
 436    if (!caches) {
 437        return 0;
 438    }
 439
 440    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 441}
 442
 443/* Called within rcu_read_lock().  */
 444static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 445{
 446    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 447    hwaddr pa = offsetof(VRingUsed, idx);
 448
 449    if (caches) {
 450        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 451        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 452    }
 453
 454    vq->used_idx = val;
 455}
 456
 457/* Called within rcu_read_lock().  */
 458static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 459{
 460    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 461    VirtIODevice *vdev = vq->vdev;
 462    hwaddr pa = offsetof(VRingUsed, flags);
 463    uint16_t flags;
 464
 465    if (!caches) {
 466        return;
 467    }
 468
 469    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 470    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 471    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 472}
 473
 474/* Called within rcu_read_lock().  */
 475static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 476{
 477    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 478    VirtIODevice *vdev = vq->vdev;
 479    hwaddr pa = offsetof(VRingUsed, flags);
 480    uint16_t flags;
 481
 482    if (!caches) {
 483        return;
 484    }
 485
 486    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 487    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 488    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 489}
 490
 491/* Called within rcu_read_lock().  */
 492static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 493{
 494    VRingMemoryRegionCaches *caches;
 495    hwaddr pa;
 496    if (!vq->notification) {
 497        return;
 498    }
 499
 500    caches = vring_get_region_caches(vq);
 501    if (!caches) {
 502        return;
 503    }
 504
 505    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 506    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 507    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 508}
 509
 510static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 511{
 512    RCU_READ_LOCK_GUARD();
 513
 514    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 515        vring_set_avail_event(vq, vring_avail_idx(vq));
 516    } else if (enable) {
 517        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 518    } else {
 519        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 520    }
 521    if (enable) {
 522        /* Expose avail event/used flags before caller checks the avail idx. */
 523        smp_mb();
 524    }
 525}
 526
 527static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 528{
 529    uint16_t off_wrap;
 530    VRingPackedDescEvent e;
 531    VRingMemoryRegionCaches *caches;
 532
 533    RCU_READ_LOCK_GUARD();
 534    caches = vring_get_region_caches(vq);
 535    if (!caches) {
 536        return;
 537    }
 538
 539    vring_packed_event_read(vq->vdev, &caches->used, &e);
 540
 541    if (!enable) {
 542        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 543    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 544        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 545        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 546        /* Make sure off_wrap is wrote before flags */
 547        smp_wmb();
 548        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 549    } else {
 550        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 551    }
 552
 553    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 554    if (enable) {
 555        /* Expose avail event/used flags before caller checks the avail idx. */
 556        smp_mb();
 557    }
 558}
 559
 560bool virtio_queue_get_notification(VirtQueue *vq)
 561{
 562    return vq->notification;
 563}
 564
 565void virtio_queue_set_notification(VirtQueue *vq, int enable)
 566{
 567    vq->notification = enable;
 568
 569    if (!vq->vring.desc) {
 570        return;
 571    }
 572
 573    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 574        virtio_queue_packed_set_notification(vq, enable);
 575    } else {
 576        virtio_queue_split_set_notification(vq, enable);
 577    }
 578}
 579
 580int virtio_queue_ready(VirtQueue *vq)
 581{
 582    return vq->vring.avail != 0;
 583}
 584
 585static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 586                                         uint16_t *flags,
 587                                         MemoryRegionCache *cache,
 588                                         int i)
 589{
 590    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 591
 592    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 593}
 594
 595static void vring_packed_desc_read(VirtIODevice *vdev,
 596                                   VRingPackedDesc *desc,
 597                                   MemoryRegionCache *cache,
 598                                   int i, bool strict_order)
 599{
 600    hwaddr off = i * sizeof(VRingPackedDesc);
 601
 602    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 603
 604    if (strict_order) {
 605        /* Make sure flags is read before the rest fields. */
 606        smp_rmb();
 607    }
 608
 609    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 610                              &desc->addr, sizeof(desc->addr));
 611    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 612                              &desc->id, sizeof(desc->id));
 613    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 614                              &desc->len, sizeof(desc->len));
 615    virtio_tswap64s(vdev, &desc->addr);
 616    virtio_tswap16s(vdev, &desc->id);
 617    virtio_tswap32s(vdev, &desc->len);
 618}
 619
 620static void vring_packed_desc_write_data(VirtIODevice *vdev,
 621                                         VRingPackedDesc *desc,
 622                                         MemoryRegionCache *cache,
 623                                         int i)
 624{
 625    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 626                    offsetof(VRingPackedDesc, id);
 627    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 628                    offsetof(VRingPackedDesc, len);
 629
 630    virtio_tswap32s(vdev, &desc->len);
 631    virtio_tswap16s(vdev, &desc->id);
 632    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 633    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 634    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 635    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 636}
 637
 638static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 639                                          VRingPackedDesc *desc,
 640                                          MemoryRegionCache *cache,
 641                                          int i)
 642{
 643    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 644
 645    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
 646    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 647}
 648
 649static void vring_packed_desc_write(VirtIODevice *vdev,
 650                                    VRingPackedDesc *desc,
 651                                    MemoryRegionCache *cache,
 652                                    int i, bool strict_order)
 653{
 654    vring_packed_desc_write_data(vdev, desc, cache, i);
 655    if (strict_order) {
 656        /* Make sure data is wrote before flags. */
 657        smp_wmb();
 658    }
 659    vring_packed_desc_write_flags(vdev, desc, cache, i);
 660}
 661
 662static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 663{
 664    bool avail, used;
 665
 666    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 667    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 668    return (avail != used) && (avail == wrap_counter);
 669}
 670
 671/* Fetch avail_idx from VQ memory only when we really need to know if
 672 * guest has added some buffers.
 673 * Called within rcu_read_lock().  */
 674static int virtio_queue_empty_rcu(VirtQueue *vq)
 675{
 676    if (virtio_device_disabled(vq->vdev)) {
 677        return 1;
 678    }
 679
 680    if (unlikely(!vq->vring.avail)) {
 681        return 1;
 682    }
 683
 684    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 685        return 0;
 686    }
 687
 688    return vring_avail_idx(vq) == vq->last_avail_idx;
 689}
 690
 691static int virtio_queue_split_empty(VirtQueue *vq)
 692{
 693    bool empty;
 694
 695    if (virtio_device_disabled(vq->vdev)) {
 696        return 1;
 697    }
 698
 699    if (unlikely(!vq->vring.avail)) {
 700        return 1;
 701    }
 702
 703    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 704        return 0;
 705    }
 706
 707    RCU_READ_LOCK_GUARD();
 708    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 709    return empty;
 710}
 711
 712/* Called within rcu_read_lock().  */
 713static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 714{
 715    struct VRingPackedDesc desc;
 716    VRingMemoryRegionCaches *cache;
 717
 718    if (unlikely(!vq->vring.desc)) {
 719        return 1;
 720    }
 721
 722    cache = vring_get_region_caches(vq);
 723    if (!cache) {
 724        return 1;
 725    }
 726
 727    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 728                                 vq->last_avail_idx);
 729
 730    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 731}
 732
 733static int virtio_queue_packed_empty(VirtQueue *vq)
 734{
 735    RCU_READ_LOCK_GUARD();
 736    return virtio_queue_packed_empty_rcu(vq);
 737}
 738
 739int virtio_queue_empty(VirtQueue *vq)
 740{
 741    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 742        return virtio_queue_packed_empty(vq);
 743    } else {
 744        return virtio_queue_split_empty(vq);
 745    }
 746}
 747
 748static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 749                               unsigned int len)
 750{
 751    AddressSpace *dma_as = vq->vdev->dma_as;
 752    unsigned int offset;
 753    int i;
 754
 755    offset = 0;
 756    for (i = 0; i < elem->in_num; i++) {
 757        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 758
 759        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 760                         elem->in_sg[i].iov_len,
 761                         DMA_DIRECTION_FROM_DEVICE, size);
 762
 763        offset += size;
 764    }
 765
 766    for (i = 0; i < elem->out_num; i++)
 767        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 768                         elem->out_sg[i].iov_len,
 769                         DMA_DIRECTION_TO_DEVICE,
 770                         elem->out_sg[i].iov_len);
 771}
 772
 773/* virtqueue_detach_element:
 774 * @vq: The #VirtQueue
 775 * @elem: The #VirtQueueElement
 776 * @len: number of bytes written
 777 *
 778 * Detach the element from the virtqueue.  This function is suitable for device
 779 * reset or other situations where a #VirtQueueElement is simply freed and will
 780 * not be pushed or discarded.
 781 */
 782void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 783                              unsigned int len)
 784{
 785    vq->inuse -= elem->ndescs;
 786    virtqueue_unmap_sg(vq, elem, len);
 787}
 788
 789static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 790{
 791    vq->last_avail_idx -= num;
 792}
 793
 794static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 795{
 796    if (vq->last_avail_idx < num) {
 797        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 798        vq->last_avail_wrap_counter ^= 1;
 799    } else {
 800        vq->last_avail_idx -= num;
 801    }
 802}
 803
 804/* virtqueue_unpop:
 805 * @vq: The #VirtQueue
 806 * @elem: The #VirtQueueElement
 807 * @len: number of bytes written
 808 *
 809 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 810 * call to virtqueue_pop() will refetch the element.
 811 */
 812void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 813                     unsigned int len)
 814{
 815
 816    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 817        virtqueue_packed_rewind(vq, 1);
 818    } else {
 819        virtqueue_split_rewind(vq, 1);
 820    }
 821
 822    virtqueue_detach_element(vq, elem, len);
 823}
 824
 825/* virtqueue_rewind:
 826 * @vq: The #VirtQueue
 827 * @num: Number of elements to push back
 828 *
 829 * Pretend that elements weren't popped from the virtqueue.  The next
 830 * virtqueue_pop() will refetch the oldest element.
 831 *
 832 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 833 *
 834 * Returns: true on success, false if @num is greater than the number of in use
 835 * elements.
 836 */
 837bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 838{
 839    if (num > vq->inuse) {
 840        return false;
 841    }
 842
 843    vq->inuse -= num;
 844    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 845        virtqueue_packed_rewind(vq, num);
 846    } else {
 847        virtqueue_split_rewind(vq, num);
 848    }
 849    return true;
 850}
 851
 852static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 853                    unsigned int len, unsigned int idx)
 854{
 855    VRingUsedElem uelem;
 856
 857    if (unlikely(!vq->vring.used)) {
 858        return;
 859    }
 860
 861    idx = (idx + vq->used_idx) % vq->vring.num;
 862
 863    uelem.id = elem->index;
 864    uelem.len = len;
 865    vring_used_write(vq, &uelem, idx);
 866}
 867
 868static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 869                                  unsigned int len, unsigned int idx)
 870{
 871    vq->used_elems[idx].index = elem->index;
 872    vq->used_elems[idx].len = len;
 873    vq->used_elems[idx].ndescs = elem->ndescs;
 874}
 875
 876static void virtqueue_packed_fill_desc(VirtQueue *vq,
 877                                       const VirtQueueElement *elem,
 878                                       unsigned int idx,
 879                                       bool strict_order)
 880{
 881    uint16_t head;
 882    VRingMemoryRegionCaches *caches;
 883    VRingPackedDesc desc = {
 884        .id = elem->index,
 885        .len = elem->len,
 886    };
 887    bool wrap_counter = vq->used_wrap_counter;
 888
 889    if (unlikely(!vq->vring.desc)) {
 890        return;
 891    }
 892
 893    head = vq->used_idx + idx;
 894    if (head >= vq->vring.num) {
 895        head -= vq->vring.num;
 896        wrap_counter ^= 1;
 897    }
 898    if (wrap_counter) {
 899        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 900        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 901    } else {
 902        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 903        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 904    }
 905
 906    caches = vring_get_region_caches(vq);
 907    if (!caches) {
 908        return;
 909    }
 910
 911    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 912}
 913
 914/* Called within rcu_read_lock().  */
 915void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 916                    unsigned int len, unsigned int idx)
 917{
 918    trace_virtqueue_fill(vq, elem, len, idx);
 919
 920    virtqueue_unmap_sg(vq, elem, len);
 921
 922    if (virtio_device_disabled(vq->vdev)) {
 923        return;
 924    }
 925
 926    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 927        virtqueue_packed_fill(vq, elem, len, idx);
 928    } else {
 929        virtqueue_split_fill(vq, elem, len, idx);
 930    }
 931}
 932
 933/* Called within rcu_read_lock().  */
 934static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 935{
 936    uint16_t old, new;
 937
 938    if (unlikely(!vq->vring.used)) {
 939        return;
 940    }
 941
 942    /* Make sure buffer is written before we update index. */
 943    smp_wmb();
 944    trace_virtqueue_flush(vq, count);
 945    old = vq->used_idx;
 946    new = old + count;
 947    vring_used_idx_set(vq, new);
 948    vq->inuse -= count;
 949    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 950        vq->signalled_used_valid = false;
 951}
 952
 953static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 954{
 955    unsigned int i, ndescs = 0;
 956
 957    if (unlikely(!vq->vring.desc)) {
 958        return;
 959    }
 960
 961    for (i = 1; i < count; i++) {
 962        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 963        ndescs += vq->used_elems[i].ndescs;
 964    }
 965    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 966    ndescs += vq->used_elems[0].ndescs;
 967
 968    vq->inuse -= ndescs;
 969    vq->used_idx += ndescs;
 970    if (vq->used_idx >= vq->vring.num) {
 971        vq->used_idx -= vq->vring.num;
 972        vq->used_wrap_counter ^= 1;
 973        vq->signalled_used_valid = false;
 974    }
 975}
 976
 977void virtqueue_flush(VirtQueue *vq, unsigned int count)
 978{
 979    if (virtio_device_disabled(vq->vdev)) {
 980        vq->inuse -= count;
 981        return;
 982    }
 983
 984    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 985        virtqueue_packed_flush(vq, count);
 986    } else {
 987        virtqueue_split_flush(vq, count);
 988    }
 989}
 990
 991void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 992                    unsigned int len)
 993{
 994    RCU_READ_LOCK_GUARD();
 995    virtqueue_fill(vq, elem, len, 0);
 996    virtqueue_flush(vq, 1);
 997}
 998
 999/* Called within rcu_read_lock().  */
1000static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1001{
1002    uint16_t num_heads = vring_avail_idx(vq) - idx;
1003
1004    /* Check it isn't doing very strange things with descriptor numbers. */
1005    if (num_heads > vq->vring.num) {
1006        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1007                     idx, vq->shadow_avail_idx);
1008        return -EINVAL;
1009    }
1010    /* On success, callers read a descriptor at vq->last_avail_idx.
1011     * Make sure descriptor read does not bypass avail index read. */
1012    if (num_heads) {
1013        smp_rmb();
1014    }
1015
1016    return num_heads;
1017}
1018
1019/* Called within rcu_read_lock().  */
1020static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1021                               unsigned int *head)
1022{
1023    /* Grab the next descriptor number they're advertising, and increment
1024     * the index we've seen. */
1025    *head = vring_avail_ring(vq, idx % vq->vring.num);
1026
1027    /* If their number is silly, that's a fatal mistake. */
1028    if (*head >= vq->vring.num) {
1029        virtio_error(vq->vdev, "Guest says index %u is available", *head);
1030        return false;
1031    }
1032
1033    return true;
1034}
1035
1036enum {
1037    VIRTQUEUE_READ_DESC_ERROR = -1,
1038    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1039    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1040};
1041
1042static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1043                                          MemoryRegionCache *desc_cache,
1044                                          unsigned int max, unsigned int *next)
1045{
1046    /* If this descriptor says it doesn't chain, we're done. */
1047    if (!(desc->flags & VRING_DESC_F_NEXT)) {
1048        return VIRTQUEUE_READ_DESC_DONE;
1049    }
1050
1051    /* Check they're not leading us off end of descriptors. */
1052    *next = desc->next;
1053    /* Make sure compiler knows to grab that: we don't want it changing! */
1054    smp_wmb();
1055
1056    if (*next >= max) {
1057        virtio_error(vdev, "Desc next is %u", *next);
1058        return VIRTQUEUE_READ_DESC_ERROR;
1059    }
1060
1061    vring_split_desc_read(vdev, desc, desc_cache, *next);
1062    return VIRTQUEUE_READ_DESC_MORE;
1063}
1064
1065/* Called within rcu_read_lock().  */
1066static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1067                            unsigned int *in_bytes, unsigned int *out_bytes,
1068                            unsigned max_in_bytes, unsigned max_out_bytes,
1069                            VRingMemoryRegionCaches *caches)
1070{
1071    VirtIODevice *vdev = vq->vdev;
1072    unsigned int idx;
1073    unsigned int total_bufs, in_total, out_total;
1074    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1075    int64_t len = 0;
1076    int rc;
1077
1078    idx = vq->last_avail_idx;
1079    total_bufs = in_total = out_total = 0;
1080
1081    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1082        MemoryRegionCache *desc_cache = &caches->desc;
1083        unsigned int num_bufs;
1084        VRingDesc desc;
1085        unsigned int i;
1086        unsigned int max = vq->vring.num;
1087
1088        num_bufs = total_bufs;
1089
1090        if (!virtqueue_get_head(vq, idx++, &i)) {
1091            goto err;
1092        }
1093
1094        vring_split_desc_read(vdev, &desc, desc_cache, i);
1095
1096        if (desc.flags & VRING_DESC_F_INDIRECT) {
1097            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1098                virtio_error(vdev, "Invalid size for indirect buffer table");
1099                goto err;
1100            }
1101
1102            /* If we've got too many, that implies a descriptor loop. */
1103            if (num_bufs >= max) {
1104                virtio_error(vdev, "Looped descriptor");
1105                goto err;
1106            }
1107
1108            /* loop over the indirect descriptor table */
1109            len = address_space_cache_init(&indirect_desc_cache,
1110                                           vdev->dma_as,
1111                                           desc.addr, desc.len, false);
1112            desc_cache = &indirect_desc_cache;
1113            if (len < desc.len) {
1114                virtio_error(vdev, "Cannot map indirect buffer");
1115                goto err;
1116            }
1117
1118            max = desc.len / sizeof(VRingDesc);
1119            num_bufs = i = 0;
1120            vring_split_desc_read(vdev, &desc, desc_cache, i);
1121        }
1122
1123        do {
1124            /* If we've got too many, that implies a descriptor loop. */
1125            if (++num_bufs > max) {
1126                virtio_error(vdev, "Looped descriptor");
1127                goto err;
1128            }
1129
1130            if (desc.flags & VRING_DESC_F_WRITE) {
1131                in_total += desc.len;
1132            } else {
1133                out_total += desc.len;
1134            }
1135            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1136                goto done;
1137            }
1138
1139            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1140        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1141
1142        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1143            goto err;
1144        }
1145
1146        if (desc_cache == &indirect_desc_cache) {
1147            address_space_cache_destroy(&indirect_desc_cache);
1148            total_bufs++;
1149        } else {
1150            total_bufs = num_bufs;
1151        }
1152    }
1153
1154    if (rc < 0) {
1155        goto err;
1156    }
1157
1158done:
1159    address_space_cache_destroy(&indirect_desc_cache);
1160    if (in_bytes) {
1161        *in_bytes = in_total;
1162    }
1163    if (out_bytes) {
1164        *out_bytes = out_total;
1165    }
1166    return;
1167
1168err:
1169    in_total = out_total = 0;
1170    goto done;
1171}
1172
1173static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1174                                           VRingPackedDesc *desc,
1175                                           MemoryRegionCache
1176                                           *desc_cache,
1177                                           unsigned int max,
1178                                           unsigned int *next,
1179                                           bool indirect)
1180{
1181    /* If this descriptor says it doesn't chain, we're done. */
1182    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1183        return VIRTQUEUE_READ_DESC_DONE;
1184    }
1185
1186    ++*next;
1187    if (*next == max) {
1188        if (indirect) {
1189            return VIRTQUEUE_READ_DESC_DONE;
1190        } else {
1191            (*next) -= vq->vring.num;
1192        }
1193    }
1194
1195    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1196    return VIRTQUEUE_READ_DESC_MORE;
1197}
1198
1199/* Called within rcu_read_lock().  */
1200static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1201                                             unsigned int *in_bytes,
1202                                             unsigned int *out_bytes,
1203                                             unsigned max_in_bytes,
1204                                             unsigned max_out_bytes,
1205                                             VRingMemoryRegionCaches *caches)
1206{
1207    VirtIODevice *vdev = vq->vdev;
1208    unsigned int idx;
1209    unsigned int total_bufs, in_total, out_total;
1210    MemoryRegionCache *desc_cache;
1211    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1212    int64_t len = 0;
1213    VRingPackedDesc desc;
1214    bool wrap_counter;
1215
1216    idx = vq->last_avail_idx;
1217    wrap_counter = vq->last_avail_wrap_counter;
1218    total_bufs = in_total = out_total = 0;
1219
1220    for (;;) {
1221        unsigned int num_bufs = total_bufs;
1222        unsigned int i = idx;
1223        int rc;
1224        unsigned int max = vq->vring.num;
1225
1226        desc_cache = &caches->desc;
1227
1228        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1229        if (!is_desc_avail(desc.flags, wrap_counter)) {
1230            break;
1231        }
1232
1233        if (desc.flags & VRING_DESC_F_INDIRECT) {
1234            if (desc.len % sizeof(VRingPackedDesc)) {
1235                virtio_error(vdev, "Invalid size for indirect buffer table");
1236                goto err;
1237            }
1238
1239            /* If we've got too many, that implies a descriptor loop. */
1240            if (num_bufs >= max) {
1241                virtio_error(vdev, "Looped descriptor");
1242                goto err;
1243            }
1244
1245            /* loop over the indirect descriptor table */
1246            len = address_space_cache_init(&indirect_desc_cache,
1247                                           vdev->dma_as,
1248                                           desc.addr, desc.len, false);
1249            desc_cache = &indirect_desc_cache;
1250            if (len < desc.len) {
1251                virtio_error(vdev, "Cannot map indirect buffer");
1252                goto err;
1253            }
1254
1255            max = desc.len / sizeof(VRingPackedDesc);
1256            num_bufs = i = 0;
1257            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1258        }
1259
1260        do {
1261            /* If we've got too many, that implies a descriptor loop. */
1262            if (++num_bufs > max) {
1263                virtio_error(vdev, "Looped descriptor");
1264                goto err;
1265            }
1266
1267            if (desc.flags & VRING_DESC_F_WRITE) {
1268                in_total += desc.len;
1269            } else {
1270                out_total += desc.len;
1271            }
1272            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1273                goto done;
1274            }
1275
1276            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1277                                                 &i, desc_cache ==
1278                                                 &indirect_desc_cache);
1279        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1280
1281        if (desc_cache == &indirect_desc_cache) {
1282            address_space_cache_destroy(&indirect_desc_cache);
1283            total_bufs++;
1284            idx++;
1285        } else {
1286            idx += num_bufs - total_bufs;
1287            total_bufs = num_bufs;
1288        }
1289
1290        if (idx >= vq->vring.num) {
1291            idx -= vq->vring.num;
1292            wrap_counter ^= 1;
1293        }
1294    }
1295
1296    /* Record the index and wrap counter for a kick we want */
1297    vq->shadow_avail_idx = idx;
1298    vq->shadow_avail_wrap_counter = wrap_counter;
1299done:
1300    address_space_cache_destroy(&indirect_desc_cache);
1301    if (in_bytes) {
1302        *in_bytes = in_total;
1303    }
1304    if (out_bytes) {
1305        *out_bytes = out_total;
1306    }
1307    return;
1308
1309err:
1310    in_total = out_total = 0;
1311    goto done;
1312}
1313
1314void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1315                               unsigned int *out_bytes,
1316                               unsigned max_in_bytes, unsigned max_out_bytes)
1317{
1318    uint16_t desc_size;
1319    VRingMemoryRegionCaches *caches;
1320
1321    RCU_READ_LOCK_GUARD();
1322
1323    if (unlikely(!vq->vring.desc)) {
1324        goto err;
1325    }
1326
1327    caches = vring_get_region_caches(vq);
1328    if (!caches) {
1329        goto err;
1330    }
1331
1332    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1333                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1334    if (caches->desc.len < vq->vring.num * desc_size) {
1335        virtio_error(vq->vdev, "Cannot map descriptor ring");
1336        goto err;
1337    }
1338
1339    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1340        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1341                                         max_in_bytes, max_out_bytes,
1342                                         caches);
1343    } else {
1344        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1345                                        max_in_bytes, max_out_bytes,
1346                                        caches);
1347    }
1348
1349    return;
1350err:
1351    if (in_bytes) {
1352        *in_bytes = 0;
1353    }
1354    if (out_bytes) {
1355        *out_bytes = 0;
1356    }
1357}
1358
1359int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1360                          unsigned int out_bytes)
1361{
1362    unsigned int in_total, out_total;
1363
1364    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1365    return in_bytes <= in_total && out_bytes <= out_total;
1366}
1367
1368static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1369                               hwaddr *addr, struct iovec *iov,
1370                               unsigned int max_num_sg, bool is_write,
1371                               hwaddr pa, size_t sz)
1372{
1373    bool ok = false;
1374    unsigned num_sg = *p_num_sg;
1375    assert(num_sg <= max_num_sg);
1376
1377    if (!sz) {
1378        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1379        goto out;
1380    }
1381
1382    while (sz) {
1383        hwaddr len = sz;
1384
1385        if (num_sg == max_num_sg) {
1386            virtio_error(vdev, "virtio: too many write descriptors in "
1387                               "indirect table");
1388            goto out;
1389        }
1390
1391        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1392                                              is_write ?
1393                                              DMA_DIRECTION_FROM_DEVICE :
1394                                              DMA_DIRECTION_TO_DEVICE,
1395                                              MEMTXATTRS_UNSPECIFIED);
1396        if (!iov[num_sg].iov_base) {
1397            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1398            goto out;
1399        }
1400
1401        iov[num_sg].iov_len = len;
1402        addr[num_sg] = pa;
1403
1404        sz -= len;
1405        pa += len;
1406        num_sg++;
1407    }
1408    ok = true;
1409
1410out:
1411    *p_num_sg = num_sg;
1412    return ok;
1413}
1414
1415/* Only used by error code paths before we have a VirtQueueElement (therefore
1416 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1417 * yet.
1418 */
1419static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1420                                    struct iovec *iov)
1421{
1422    unsigned int i;
1423
1424    for (i = 0; i < out_num + in_num; i++) {
1425        int is_write = i >= out_num;
1426
1427        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1428        iov++;
1429    }
1430}
1431
1432static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1433                                hwaddr *addr, unsigned int num_sg,
1434                                bool is_write)
1435{
1436    unsigned int i;
1437    hwaddr len;
1438
1439    for (i = 0; i < num_sg; i++) {
1440        len = sg[i].iov_len;
1441        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1442                                        addr[i], &len, is_write ?
1443                                        DMA_DIRECTION_FROM_DEVICE :
1444                                        DMA_DIRECTION_TO_DEVICE,
1445                                        MEMTXATTRS_UNSPECIFIED);
1446        if (!sg[i].iov_base) {
1447            error_report("virtio: error trying to map MMIO memory");
1448            exit(1);
1449        }
1450        if (len != sg[i].iov_len) {
1451            error_report("virtio: unexpected memory split");
1452            exit(1);
1453        }
1454    }
1455}
1456
1457void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1458{
1459    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1460    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1461                                                                        false);
1462}
1463
1464static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1465{
1466    VirtQueueElement *elem;
1467    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1468    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1469    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1470    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1471    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1472    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1473
1474    assert(sz >= sizeof(VirtQueueElement));
1475    elem = g_malloc(out_sg_end);
1476    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1477    elem->out_num = out_num;
1478    elem->in_num = in_num;
1479    elem->in_addr = (void *)elem + in_addr_ofs;
1480    elem->out_addr = (void *)elem + out_addr_ofs;
1481    elem->in_sg = (void *)elem + in_sg_ofs;
1482    elem->out_sg = (void *)elem + out_sg_ofs;
1483    return elem;
1484}
1485
1486static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1487{
1488    unsigned int i, head, max;
1489    VRingMemoryRegionCaches *caches;
1490    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1491    MemoryRegionCache *desc_cache;
1492    int64_t len;
1493    VirtIODevice *vdev = vq->vdev;
1494    VirtQueueElement *elem = NULL;
1495    unsigned out_num, in_num, elem_entries;
1496    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1497    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1498    VRingDesc desc;
1499    int rc;
1500
1501    RCU_READ_LOCK_GUARD();
1502    if (virtio_queue_empty_rcu(vq)) {
1503        goto done;
1504    }
1505    /* Needed after virtio_queue_empty(), see comment in
1506     * virtqueue_num_heads(). */
1507    smp_rmb();
1508
1509    /* When we start there are none of either input nor output. */
1510    out_num = in_num = elem_entries = 0;
1511
1512    max = vq->vring.num;
1513
1514    if (vq->inuse >= vq->vring.num) {
1515        virtio_error(vdev, "Virtqueue size exceeded");
1516        goto done;
1517    }
1518
1519    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1520        goto done;
1521    }
1522
1523    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1524        vring_set_avail_event(vq, vq->last_avail_idx);
1525    }
1526
1527    i = head;
1528
1529    caches = vring_get_region_caches(vq);
1530    if (!caches) {
1531        virtio_error(vdev, "Region caches not initialized");
1532        goto done;
1533    }
1534
1535    if (caches->desc.len < max * sizeof(VRingDesc)) {
1536        virtio_error(vdev, "Cannot map descriptor ring");
1537        goto done;
1538    }
1539
1540    desc_cache = &caches->desc;
1541    vring_split_desc_read(vdev, &desc, desc_cache, i);
1542    if (desc.flags & VRING_DESC_F_INDIRECT) {
1543        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1544            virtio_error(vdev, "Invalid size for indirect buffer table");
1545            goto done;
1546        }
1547
1548        /* loop over the indirect descriptor table */
1549        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1550                                       desc.addr, desc.len, false);
1551        desc_cache = &indirect_desc_cache;
1552        if (len < desc.len) {
1553            virtio_error(vdev, "Cannot map indirect buffer");
1554            goto done;
1555        }
1556
1557        max = desc.len / sizeof(VRingDesc);
1558        i = 0;
1559        vring_split_desc_read(vdev, &desc, desc_cache, i);
1560    }
1561
1562    /* Collect all the descriptors */
1563    do {
1564        bool map_ok;
1565
1566        if (desc.flags & VRING_DESC_F_WRITE) {
1567            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1568                                        iov + out_num,
1569                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1570                                        desc.addr, desc.len);
1571        } else {
1572            if (in_num) {
1573                virtio_error(vdev, "Incorrect order for descriptors");
1574                goto err_undo_map;
1575            }
1576            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1577                                        VIRTQUEUE_MAX_SIZE, false,
1578                                        desc.addr, desc.len);
1579        }
1580        if (!map_ok) {
1581            goto err_undo_map;
1582        }
1583
1584        /* If we've got too many, that implies a descriptor loop. */
1585        if (++elem_entries > max) {
1586            virtio_error(vdev, "Looped descriptor");
1587            goto err_undo_map;
1588        }
1589
1590        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1591    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1592
1593    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1594        goto err_undo_map;
1595    }
1596
1597    /* Now copy what we have collected and mapped */
1598    elem = virtqueue_alloc_element(sz, out_num, in_num);
1599    elem->index = head;
1600    elem->ndescs = 1;
1601    for (i = 0; i < out_num; i++) {
1602        elem->out_addr[i] = addr[i];
1603        elem->out_sg[i] = iov[i];
1604    }
1605    for (i = 0; i < in_num; i++) {
1606        elem->in_addr[i] = addr[out_num + i];
1607        elem->in_sg[i] = iov[out_num + i];
1608    }
1609
1610    vq->inuse++;
1611
1612    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1613done:
1614    address_space_cache_destroy(&indirect_desc_cache);
1615
1616    return elem;
1617
1618err_undo_map:
1619    virtqueue_undo_map_desc(out_num, in_num, iov);
1620    goto done;
1621}
1622
1623static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1624{
1625    unsigned int i, max;
1626    VRingMemoryRegionCaches *caches;
1627    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1628    MemoryRegionCache *desc_cache;
1629    int64_t len;
1630    VirtIODevice *vdev = vq->vdev;
1631    VirtQueueElement *elem = NULL;
1632    unsigned out_num, in_num, elem_entries;
1633    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1634    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1635    VRingPackedDesc desc;
1636    uint16_t id;
1637    int rc;
1638
1639    RCU_READ_LOCK_GUARD();
1640    if (virtio_queue_packed_empty_rcu(vq)) {
1641        goto done;
1642    }
1643
1644    /* When we start there are none of either input nor output. */
1645    out_num = in_num = elem_entries = 0;
1646
1647    max = vq->vring.num;
1648
1649    if (vq->inuse >= vq->vring.num) {
1650        virtio_error(vdev, "Virtqueue size exceeded");
1651        goto done;
1652    }
1653
1654    i = vq->last_avail_idx;
1655
1656    caches = vring_get_region_caches(vq);
1657    if (!caches) {
1658        virtio_error(vdev, "Region caches not initialized");
1659        goto done;
1660    }
1661
1662    if (caches->desc.len < max * sizeof(VRingDesc)) {
1663        virtio_error(vdev, "Cannot map descriptor ring");
1664        goto done;
1665    }
1666
1667    desc_cache = &caches->desc;
1668    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1669    id = desc.id;
1670    if (desc.flags & VRING_DESC_F_INDIRECT) {
1671        if (desc.len % sizeof(VRingPackedDesc)) {
1672            virtio_error(vdev, "Invalid size for indirect buffer table");
1673            goto done;
1674        }
1675
1676        /* loop over the indirect descriptor table */
1677        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1678                                       desc.addr, desc.len, false);
1679        desc_cache = &indirect_desc_cache;
1680        if (len < desc.len) {
1681            virtio_error(vdev, "Cannot map indirect buffer");
1682            goto done;
1683        }
1684
1685        max = desc.len / sizeof(VRingPackedDesc);
1686        i = 0;
1687        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1688    }
1689
1690    /* Collect all the descriptors */
1691    do {
1692        bool map_ok;
1693
1694        if (desc.flags & VRING_DESC_F_WRITE) {
1695            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1696                                        iov + out_num,
1697                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1698                                        desc.addr, desc.len);
1699        } else {
1700            if (in_num) {
1701                virtio_error(vdev, "Incorrect order for descriptors");
1702                goto err_undo_map;
1703            }
1704            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1705                                        VIRTQUEUE_MAX_SIZE, false,
1706                                        desc.addr, desc.len);
1707        }
1708        if (!map_ok) {
1709            goto err_undo_map;
1710        }
1711
1712        /* If we've got too many, that implies a descriptor loop. */
1713        if (++elem_entries > max) {
1714            virtio_error(vdev, "Looped descriptor");
1715            goto err_undo_map;
1716        }
1717
1718        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1719                                             desc_cache ==
1720                                             &indirect_desc_cache);
1721    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1722
1723    /* Now copy what we have collected and mapped */
1724    elem = virtqueue_alloc_element(sz, out_num, in_num);
1725    for (i = 0; i < out_num; i++) {
1726        elem->out_addr[i] = addr[i];
1727        elem->out_sg[i] = iov[i];
1728    }
1729    for (i = 0; i < in_num; i++) {
1730        elem->in_addr[i] = addr[out_num + i];
1731        elem->in_sg[i] = iov[out_num + i];
1732    }
1733
1734    elem->index = id;
1735    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1736    vq->last_avail_idx += elem->ndescs;
1737    vq->inuse += elem->ndescs;
1738
1739    if (vq->last_avail_idx >= vq->vring.num) {
1740        vq->last_avail_idx -= vq->vring.num;
1741        vq->last_avail_wrap_counter ^= 1;
1742    }
1743
1744    vq->shadow_avail_idx = vq->last_avail_idx;
1745    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1746
1747    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1748done:
1749    address_space_cache_destroy(&indirect_desc_cache);
1750
1751    return elem;
1752
1753err_undo_map:
1754    virtqueue_undo_map_desc(out_num, in_num, iov);
1755    goto done;
1756}
1757
1758void *virtqueue_pop(VirtQueue *vq, size_t sz)
1759{
1760    if (virtio_device_disabled(vq->vdev)) {
1761        return NULL;
1762    }
1763
1764    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1765        return virtqueue_packed_pop(vq, sz);
1766    } else {
1767        return virtqueue_split_pop(vq, sz);
1768    }
1769}
1770
1771static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1772{
1773    VRingMemoryRegionCaches *caches;
1774    MemoryRegionCache *desc_cache;
1775    unsigned int dropped = 0;
1776    VirtQueueElement elem = {};
1777    VirtIODevice *vdev = vq->vdev;
1778    VRingPackedDesc desc;
1779
1780    RCU_READ_LOCK_GUARD();
1781
1782    caches = vring_get_region_caches(vq);
1783    if (!caches) {
1784        return 0;
1785    }
1786
1787    desc_cache = &caches->desc;
1788
1789    virtio_queue_set_notification(vq, 0);
1790
1791    while (vq->inuse < vq->vring.num) {
1792        unsigned int idx = vq->last_avail_idx;
1793        /*
1794         * works similar to virtqueue_pop but does not map buffers
1795         * and does not allocate any memory.
1796         */
1797        vring_packed_desc_read(vdev, &desc, desc_cache,
1798                               vq->last_avail_idx , true);
1799        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1800            break;
1801        }
1802        elem.index = desc.id;
1803        elem.ndescs = 1;
1804        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1805                                               vq->vring.num, &idx, false)) {
1806            ++elem.ndescs;
1807        }
1808        /*
1809         * immediately push the element, nothing to unmap
1810         * as both in_num and out_num are set to 0.
1811         */
1812        virtqueue_push(vq, &elem, 0);
1813        dropped++;
1814        vq->last_avail_idx += elem.ndescs;
1815        if (vq->last_avail_idx >= vq->vring.num) {
1816            vq->last_avail_idx -= vq->vring.num;
1817            vq->last_avail_wrap_counter ^= 1;
1818        }
1819    }
1820
1821    return dropped;
1822}
1823
1824static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1825{
1826    unsigned int dropped = 0;
1827    VirtQueueElement elem = {};
1828    VirtIODevice *vdev = vq->vdev;
1829    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1830
1831    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1832        /* works similar to virtqueue_pop but does not map buffers
1833        * and does not allocate any memory */
1834        smp_rmb();
1835        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1836            break;
1837        }
1838        vq->inuse++;
1839        vq->last_avail_idx++;
1840        if (fEventIdx) {
1841            vring_set_avail_event(vq, vq->last_avail_idx);
1842        }
1843        /* immediately push the element, nothing to unmap
1844         * as both in_num and out_num are set to 0 */
1845        virtqueue_push(vq, &elem, 0);
1846        dropped++;
1847    }
1848
1849    return dropped;
1850}
1851
1852/* virtqueue_drop_all:
1853 * @vq: The #VirtQueue
1854 * Drops all queued buffers and indicates them to the guest
1855 * as if they are done. Useful when buffers can not be
1856 * processed but must be returned to the guest.
1857 */
1858unsigned int virtqueue_drop_all(VirtQueue *vq)
1859{
1860    struct VirtIODevice *vdev = vq->vdev;
1861
1862    if (virtio_device_disabled(vq->vdev)) {
1863        return 0;
1864    }
1865
1866    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1867        return virtqueue_packed_drop_all(vq);
1868    } else {
1869        return virtqueue_split_drop_all(vq);
1870    }
1871}
1872
1873/* Reading and writing a structure directly to QEMUFile is *awful*, but
1874 * it is what QEMU has always done by mistake.  We can change it sooner
1875 * or later by bumping the version number of the affected vm states.
1876 * In the meanwhile, since the in-memory layout of VirtQueueElement
1877 * has changed, we need to marshal to and from the layout that was
1878 * used before the change.
1879 */
1880typedef struct VirtQueueElementOld {
1881    unsigned int index;
1882    unsigned int out_num;
1883    unsigned int in_num;
1884    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1885    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1886    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1887    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1888} VirtQueueElementOld;
1889
1890void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1891{
1892    VirtQueueElement *elem;
1893    VirtQueueElementOld data;
1894    int i;
1895
1896    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897
1898    /* TODO: teach all callers that this can fail, and return failure instead
1899     * of asserting here.
1900     * This is just one thing (there are probably more) that must be
1901     * fixed before we can allow NDEBUG compilation.
1902     */
1903    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1904    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1905
1906    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1907    elem->index = data.index;
1908
1909    for (i = 0; i < elem->in_num; i++) {
1910        elem->in_addr[i] = data.in_addr[i];
1911    }
1912
1913    for (i = 0; i < elem->out_num; i++) {
1914        elem->out_addr[i] = data.out_addr[i];
1915    }
1916
1917    for (i = 0; i < elem->in_num; i++) {
1918        /* Base is overwritten by virtqueue_map.  */
1919        elem->in_sg[i].iov_base = 0;
1920        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1921    }
1922
1923    for (i = 0; i < elem->out_num; i++) {
1924        /* Base is overwritten by virtqueue_map.  */
1925        elem->out_sg[i].iov_base = 0;
1926        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1927    }
1928
1929    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1930        qemu_get_be32s(f, &elem->ndescs);
1931    }
1932
1933    virtqueue_map(vdev, elem);
1934    return elem;
1935}
1936
1937void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1938                                VirtQueueElement *elem)
1939{
1940    VirtQueueElementOld data;
1941    int i;
1942
1943    memset(&data, 0, sizeof(data));
1944    data.index = elem->index;
1945    data.in_num = elem->in_num;
1946    data.out_num = elem->out_num;
1947
1948    for (i = 0; i < elem->in_num; i++) {
1949        data.in_addr[i] = elem->in_addr[i];
1950    }
1951
1952    for (i = 0; i < elem->out_num; i++) {
1953        data.out_addr[i] = elem->out_addr[i];
1954    }
1955
1956    for (i = 0; i < elem->in_num; i++) {
1957        /* Base is overwritten by virtqueue_map when loading.  Do not
1958         * save it, as it would leak the QEMU address space layout.  */
1959        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1960    }
1961
1962    for (i = 0; i < elem->out_num; i++) {
1963        /* Do not save iov_base as above.  */
1964        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1965    }
1966
1967    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1968        qemu_put_be32s(f, &elem->ndescs);
1969    }
1970
1971    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1972}
1973
1974/* virtio device */
1975static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1976{
1977    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1978    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1979
1980    if (virtio_device_disabled(vdev)) {
1981        return;
1982    }
1983
1984    if (k->notify) {
1985        k->notify(qbus->parent, vector);
1986    }
1987}
1988
1989void virtio_update_irq(VirtIODevice *vdev)
1990{
1991    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1992}
1993
1994static int virtio_validate_features(VirtIODevice *vdev)
1995{
1996    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1997
1998    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1999        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2000        return -EFAULT;
2001    }
2002
2003    if (k->validate_features) {
2004        return k->validate_features(vdev);
2005    } else {
2006        return 0;
2007    }
2008}
2009
2010int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2011{
2012    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2013    trace_virtio_set_status(vdev, val);
2014
2015    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2016        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2017            val & VIRTIO_CONFIG_S_FEATURES_OK) {
2018            int ret = virtio_validate_features(vdev);
2019
2020            if (ret) {
2021                return ret;
2022            }
2023        }
2024    }
2025
2026    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2027        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2028        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2029    }
2030
2031    if (k->set_status) {
2032        k->set_status(vdev, val);
2033    }
2034    vdev->status = val;
2035
2036    return 0;
2037}
2038
2039static enum virtio_device_endian virtio_default_endian(void)
2040{
2041    if (target_words_bigendian()) {
2042        return VIRTIO_DEVICE_ENDIAN_BIG;
2043    } else {
2044        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2045    }
2046}
2047
2048static enum virtio_device_endian virtio_current_cpu_endian(void)
2049{
2050    if (cpu_virtio_is_big_endian(current_cpu)) {
2051        return VIRTIO_DEVICE_ENDIAN_BIG;
2052    } else {
2053        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2054    }
2055}
2056
2057static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2058{
2059    vdev->vq[i].vring.desc = 0;
2060    vdev->vq[i].vring.avail = 0;
2061    vdev->vq[i].vring.used = 0;
2062    vdev->vq[i].last_avail_idx = 0;
2063    vdev->vq[i].shadow_avail_idx = 0;
2064    vdev->vq[i].used_idx = 0;
2065    vdev->vq[i].last_avail_wrap_counter = true;
2066    vdev->vq[i].shadow_avail_wrap_counter = true;
2067    vdev->vq[i].used_wrap_counter = true;
2068    virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2069    vdev->vq[i].signalled_used = 0;
2070    vdev->vq[i].signalled_used_valid = false;
2071    vdev->vq[i].notification = true;
2072    vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2073    vdev->vq[i].inuse = 0;
2074    virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2075}
2076
2077void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2078{
2079    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080
2081    if (k->queue_reset) {
2082        k->queue_reset(vdev, queue_index);
2083    }
2084
2085    __virtio_queue_reset(vdev, queue_index);
2086}
2087
2088void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2089{
2090    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2091
2092    /*
2093     * TODO: Seabios is currently out of spec and triggering this error.
2094     * So this needs to be fixed in Seabios, then this can
2095     * be re-enabled for new machine types only, and also after
2096     * being converted to LOG_GUEST_ERROR.
2097     *
2098    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2099        error_report("queue_enable is only suppported in devices of virtio "
2100                     "1.0 or later.");
2101    }
2102    */
2103
2104    if (k->queue_enable) {
2105        k->queue_enable(vdev, queue_index);
2106    }
2107}
2108
2109void virtio_reset(void *opaque)
2110{
2111    VirtIODevice *vdev = opaque;
2112    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2113    int i;
2114
2115    virtio_set_status(vdev, 0);
2116    if (current_cpu) {
2117        /* Guest initiated reset */
2118        vdev->device_endian = virtio_current_cpu_endian();
2119    } else {
2120        /* System reset */
2121        vdev->device_endian = virtio_default_endian();
2122    }
2123
2124    if (k->reset) {
2125        k->reset(vdev);
2126    }
2127
2128    vdev->start_on_kick = false;
2129    vdev->started = false;
2130    vdev->broken = false;
2131    vdev->guest_features = 0;
2132    vdev->queue_sel = 0;
2133    vdev->status = 0;
2134    vdev->disabled = false;
2135    qatomic_set(&vdev->isr, 0);
2136    vdev->config_vector = VIRTIO_NO_VECTOR;
2137    virtio_notify_vector(vdev, vdev->config_vector);
2138
2139    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2140        __virtio_queue_reset(vdev, i);
2141    }
2142}
2143
2144void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2145{
2146    if (!vdev->vq[n].vring.num) {
2147        return;
2148    }
2149    vdev->vq[n].vring.desc = addr;
2150    virtio_queue_update_rings(vdev, n);
2151}
2152
2153hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2154{
2155    return vdev->vq[n].vring.desc;
2156}
2157
2158void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2159                            hwaddr avail, hwaddr used)
2160{
2161    if (!vdev->vq[n].vring.num) {
2162        return;
2163    }
2164    vdev->vq[n].vring.desc = desc;
2165    vdev->vq[n].vring.avail = avail;
2166    vdev->vq[n].vring.used = used;
2167    virtio_init_region_cache(vdev, n);
2168}
2169
2170void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2171{
2172    /* Don't allow guest to flip queue between existent and
2173     * nonexistent states, or to set it to an invalid size.
2174     */
2175    if (!!num != !!vdev->vq[n].vring.num ||
2176        num > VIRTQUEUE_MAX_SIZE ||
2177        num < 0) {
2178        return;
2179    }
2180    vdev->vq[n].vring.num = num;
2181}
2182
2183VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2184{
2185    return QLIST_FIRST(&vdev->vector_queues[vector]);
2186}
2187
2188VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2189{
2190    return QLIST_NEXT(vq, node);
2191}
2192
2193int virtio_queue_get_num(VirtIODevice *vdev, int n)
2194{
2195    return vdev->vq[n].vring.num;
2196}
2197
2198int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2199{
2200    return vdev->vq[n].vring.num_default;
2201}
2202
2203int virtio_get_num_queues(VirtIODevice *vdev)
2204{
2205    int i;
2206
2207    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2208        if (!virtio_queue_get_num(vdev, i)) {
2209            break;
2210        }
2211    }
2212
2213    return i;
2214}
2215
2216void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2217{
2218    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2219    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2220
2221    /* virtio-1 compliant devices cannot change the alignment */
2222    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2223        error_report("tried to modify queue alignment for virtio-1 device");
2224        return;
2225    }
2226    /* Check that the transport told us it was going to do this
2227     * (so a buggy transport will immediately assert rather than
2228     * silently failing to migrate this state)
2229     */
2230    assert(k->has_variable_vring_alignment);
2231
2232    if (align) {
2233        vdev->vq[n].vring.align = align;
2234        virtio_queue_update_rings(vdev, n);
2235    }
2236}
2237
2238static void virtio_queue_notify_vq(VirtQueue *vq)
2239{
2240    if (vq->vring.desc && vq->handle_output) {
2241        VirtIODevice *vdev = vq->vdev;
2242
2243        if (unlikely(vdev->broken)) {
2244            return;
2245        }
2246
2247        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2248        vq->handle_output(vdev, vq);
2249
2250        if (unlikely(vdev->start_on_kick)) {
2251            virtio_set_started(vdev, true);
2252        }
2253    }
2254}
2255
2256void virtio_queue_notify(VirtIODevice *vdev, int n)
2257{
2258    VirtQueue *vq = &vdev->vq[n];
2259
2260    if (unlikely(!vq->vring.desc || vdev->broken)) {
2261        return;
2262    }
2263
2264    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2265    if (vq->host_notifier_enabled) {
2266        event_notifier_set(&vq->host_notifier);
2267    } else if (vq->handle_output) {
2268        vq->handle_output(vdev, vq);
2269
2270        if (unlikely(vdev->start_on_kick)) {
2271            virtio_set_started(vdev, true);
2272        }
2273    }
2274}
2275
2276uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2277{
2278    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2279        VIRTIO_NO_VECTOR;
2280}
2281
2282void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2283{
2284    VirtQueue *vq = &vdev->vq[n];
2285
2286    if (n < VIRTIO_QUEUE_MAX) {
2287        if (vdev->vector_queues &&
2288            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2289            QLIST_REMOVE(vq, node);
2290        }
2291        vdev->vq[n].vector = vector;
2292        if (vdev->vector_queues &&
2293            vector != VIRTIO_NO_VECTOR) {
2294            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2295        }
2296    }
2297}
2298
2299VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2300                            VirtIOHandleOutput handle_output)
2301{
2302    int i;
2303
2304    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2305        if (vdev->vq[i].vring.num == 0)
2306            break;
2307    }
2308
2309    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2310        abort();
2311
2312    vdev->vq[i].vring.num = queue_size;
2313    vdev->vq[i].vring.num_default = queue_size;
2314    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2315    vdev->vq[i].handle_output = handle_output;
2316    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2317
2318    return &vdev->vq[i];
2319}
2320
2321void virtio_delete_queue(VirtQueue *vq)
2322{
2323    vq->vring.num = 0;
2324    vq->vring.num_default = 0;
2325    vq->handle_output = NULL;
2326    g_free(vq->used_elems);
2327    vq->used_elems = NULL;
2328    virtio_virtqueue_reset_region_cache(vq);
2329}
2330
2331void virtio_del_queue(VirtIODevice *vdev, int n)
2332{
2333    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2334        abort();
2335    }
2336
2337    virtio_delete_queue(&vdev->vq[n]);
2338}
2339
2340static void virtio_set_isr(VirtIODevice *vdev, int value)
2341{
2342    uint8_t old = qatomic_read(&vdev->isr);
2343
2344    /* Do not write ISR if it does not change, so that its cacheline remains
2345     * shared in the common case where the guest does not read it.
2346     */
2347    if ((old & value) != value) {
2348        qatomic_or(&vdev->isr, value);
2349    }
2350}
2351
2352/* Called within rcu_read_lock(). */
2353static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2354{
2355    uint16_t old, new;
2356    bool v;
2357    /* We need to expose used array entries before checking used event. */
2358    smp_mb();
2359    /* Always notify when queue is empty (when feature acknowledge) */
2360    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2361        !vq->inuse && virtio_queue_empty(vq)) {
2362        return true;
2363    }
2364
2365    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2366        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2367    }
2368
2369    v = vq->signalled_used_valid;
2370    vq->signalled_used_valid = true;
2371    old = vq->signalled_used;
2372    new = vq->signalled_used = vq->used_idx;
2373    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2374}
2375
2376static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2377                                    uint16_t off_wrap, uint16_t new,
2378                                    uint16_t old)
2379{
2380    int off = off_wrap & ~(1 << 15);
2381
2382    if (wrap != off_wrap >> 15) {
2383        off -= vq->vring.num;
2384    }
2385
2386    return vring_need_event(off, new, old);
2387}
2388
2389/* Called within rcu_read_lock(). */
2390static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2391{
2392    VRingPackedDescEvent e;
2393    uint16_t old, new;
2394    bool v;
2395    VRingMemoryRegionCaches *caches;
2396
2397    caches = vring_get_region_caches(vq);
2398    if (!caches) {
2399        return false;
2400    }
2401
2402    vring_packed_event_read(vdev, &caches->avail, &e);
2403
2404    old = vq->signalled_used;
2405    new = vq->signalled_used = vq->used_idx;
2406    v = vq->signalled_used_valid;
2407    vq->signalled_used_valid = true;
2408
2409    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2410        return false;
2411    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2412        return true;
2413    }
2414
2415    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2416                                         e.off_wrap, new, old);
2417}
2418
2419/* Called within rcu_read_lock().  */
2420static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2421{
2422    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2423        return virtio_packed_should_notify(vdev, vq);
2424    } else {
2425        return virtio_split_should_notify(vdev, vq);
2426    }
2427}
2428
2429void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2430{
2431    WITH_RCU_READ_LOCK_GUARD() {
2432        if (!virtio_should_notify(vdev, vq)) {
2433            return;
2434        }
2435    }
2436
2437    trace_virtio_notify_irqfd(vdev, vq);
2438
2439    /*
2440     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2441     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2442     * incorrectly polling this bit during crashdump and hibernation
2443     * in MSI mode, causing a hang if this bit is never updated.
2444     * Recent releases of Windows do not really shut down, but rather
2445     * log out and hibernate to make the next startup faster.  Hence,
2446     * this manifested as a more serious hang during shutdown with
2447     *
2448     * Next driver release from 2016 fixed this problem, so working around it
2449     * is not a must, but it's easy to do so let's do it here.
2450     *
2451     * Note: it's safe to update ISR from any thread as it was switched
2452     * to an atomic operation.
2453     */
2454    virtio_set_isr(vq->vdev, 0x1);
2455    event_notifier_set(&vq->guest_notifier);
2456}
2457
2458static void virtio_irq(VirtQueue *vq)
2459{
2460    virtio_set_isr(vq->vdev, 0x1);
2461    virtio_notify_vector(vq->vdev, vq->vector);
2462}
2463
2464void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2465{
2466    WITH_RCU_READ_LOCK_GUARD() {
2467        if (!virtio_should_notify(vdev, vq)) {
2468            return;
2469        }
2470    }
2471
2472    trace_virtio_notify(vdev, vq);
2473    virtio_irq(vq);
2474}
2475
2476void virtio_notify_config(VirtIODevice *vdev)
2477{
2478    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2479        return;
2480
2481    virtio_set_isr(vdev, 0x3);
2482    vdev->generation++;
2483    virtio_notify_vector(vdev, vdev->config_vector);
2484}
2485
2486static bool virtio_device_endian_needed(void *opaque)
2487{
2488    VirtIODevice *vdev = opaque;
2489
2490    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2491    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2492        return vdev->device_endian != virtio_default_endian();
2493    }
2494    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2495    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2496}
2497
2498static bool virtio_64bit_features_needed(void *opaque)
2499{
2500    VirtIODevice *vdev = opaque;
2501
2502    return (vdev->host_features >> 32) != 0;
2503}
2504
2505static bool virtio_virtqueue_needed(void *opaque)
2506{
2507    VirtIODevice *vdev = opaque;
2508
2509    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2510}
2511
2512static bool virtio_packed_virtqueue_needed(void *opaque)
2513{
2514    VirtIODevice *vdev = opaque;
2515
2516    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2517}
2518
2519static bool virtio_ringsize_needed(void *opaque)
2520{
2521    VirtIODevice *vdev = opaque;
2522    int i;
2523
2524    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2525        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2526            return true;
2527        }
2528    }
2529    return false;
2530}
2531
2532static bool virtio_extra_state_needed(void *opaque)
2533{
2534    VirtIODevice *vdev = opaque;
2535    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2536    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2537
2538    return k->has_extra_state &&
2539        k->has_extra_state(qbus->parent);
2540}
2541
2542static bool virtio_broken_needed(void *opaque)
2543{
2544    VirtIODevice *vdev = opaque;
2545
2546    return vdev->broken;
2547}
2548
2549static bool virtio_started_needed(void *opaque)
2550{
2551    VirtIODevice *vdev = opaque;
2552
2553    return vdev->started;
2554}
2555
2556static bool virtio_disabled_needed(void *opaque)
2557{
2558    VirtIODevice *vdev = opaque;
2559
2560    return vdev->disabled;
2561}
2562
2563static const VMStateDescription vmstate_virtqueue = {
2564    .name = "virtqueue_state",
2565    .version_id = 1,
2566    .minimum_version_id = 1,
2567    .fields = (VMStateField[]) {
2568        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2569        VMSTATE_UINT64(vring.used, struct VirtQueue),
2570        VMSTATE_END_OF_LIST()
2571    }
2572};
2573
2574static const VMStateDescription vmstate_packed_virtqueue = {
2575    .name = "packed_virtqueue_state",
2576    .version_id = 1,
2577    .minimum_version_id = 1,
2578    .fields = (VMStateField[]) {
2579        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2580        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2581        VMSTATE_UINT16(used_idx, struct VirtQueue),
2582        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2583        VMSTATE_UINT32(inuse, struct VirtQueue),
2584        VMSTATE_END_OF_LIST()
2585    }
2586};
2587
2588static const VMStateDescription vmstate_virtio_virtqueues = {
2589    .name = "virtio/virtqueues",
2590    .version_id = 1,
2591    .minimum_version_id = 1,
2592    .needed = &virtio_virtqueue_needed,
2593    .fields = (VMStateField[]) {
2594        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2595                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2596        VMSTATE_END_OF_LIST()
2597    }
2598};
2599
2600static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2601    .name = "virtio/packed_virtqueues",
2602    .version_id = 1,
2603    .minimum_version_id = 1,
2604    .needed = &virtio_packed_virtqueue_needed,
2605    .fields = (VMStateField[]) {
2606        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2607                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2608        VMSTATE_END_OF_LIST()
2609    }
2610};
2611
2612static const VMStateDescription vmstate_ringsize = {
2613    .name = "ringsize_state",
2614    .version_id = 1,
2615    .minimum_version_id = 1,
2616    .fields = (VMStateField[]) {
2617        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2618        VMSTATE_END_OF_LIST()
2619    }
2620};
2621
2622static const VMStateDescription vmstate_virtio_ringsize = {
2623    .name = "virtio/ringsize",
2624    .version_id = 1,
2625    .minimum_version_id = 1,
2626    .needed = &virtio_ringsize_needed,
2627    .fields = (VMStateField[]) {
2628        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2629                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2630        VMSTATE_END_OF_LIST()
2631    }
2632};
2633
2634static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2635                           const VMStateField *field)
2636{
2637    VirtIODevice *vdev = pv;
2638    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2639    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2640
2641    if (!k->load_extra_state) {
2642        return -1;
2643    } else {
2644        return k->load_extra_state(qbus->parent, f);
2645    }
2646}
2647
2648static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2649                           const VMStateField *field, JSONWriter *vmdesc)
2650{
2651    VirtIODevice *vdev = pv;
2652    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2653    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2654
2655    k->save_extra_state(qbus->parent, f);
2656    return 0;
2657}
2658
2659static const VMStateInfo vmstate_info_extra_state = {
2660    .name = "virtqueue_extra_state",
2661    .get = get_extra_state,
2662    .put = put_extra_state,
2663};
2664
2665static const VMStateDescription vmstate_virtio_extra_state = {
2666    .name = "virtio/extra_state",
2667    .version_id = 1,
2668    .minimum_version_id = 1,
2669    .needed = &virtio_extra_state_needed,
2670    .fields = (VMStateField[]) {
2671        {
2672            .name         = "extra_state",
2673            .version_id   = 0,
2674            .field_exists = NULL,
2675            .size         = 0,
2676            .info         = &vmstate_info_extra_state,
2677            .flags        = VMS_SINGLE,
2678            .offset       = 0,
2679        },
2680        VMSTATE_END_OF_LIST()
2681    }
2682};
2683
2684static const VMStateDescription vmstate_virtio_device_endian = {
2685    .name = "virtio/device_endian",
2686    .version_id = 1,
2687    .minimum_version_id = 1,
2688    .needed = &virtio_device_endian_needed,
2689    .fields = (VMStateField[]) {
2690        VMSTATE_UINT8(device_endian, VirtIODevice),
2691        VMSTATE_END_OF_LIST()
2692    }
2693};
2694
2695static const VMStateDescription vmstate_virtio_64bit_features = {
2696    .name = "virtio/64bit_features",
2697    .version_id = 1,
2698    .minimum_version_id = 1,
2699    .needed = &virtio_64bit_features_needed,
2700    .fields = (VMStateField[]) {
2701        VMSTATE_UINT64(guest_features, VirtIODevice),
2702        VMSTATE_END_OF_LIST()
2703    }
2704};
2705
2706static const VMStateDescription vmstate_virtio_broken = {
2707    .name = "virtio/broken",
2708    .version_id = 1,
2709    .minimum_version_id = 1,
2710    .needed = &virtio_broken_needed,
2711    .fields = (VMStateField[]) {
2712        VMSTATE_BOOL(broken, VirtIODevice),
2713        VMSTATE_END_OF_LIST()
2714    }
2715};
2716
2717static const VMStateDescription vmstate_virtio_started = {
2718    .name = "virtio/started",
2719    .version_id = 1,
2720    .minimum_version_id = 1,
2721    .needed = &virtio_started_needed,
2722    .fields = (VMStateField[]) {
2723        VMSTATE_BOOL(started, VirtIODevice),
2724        VMSTATE_END_OF_LIST()
2725    }
2726};
2727
2728static const VMStateDescription vmstate_virtio_disabled = {
2729    .name = "virtio/disabled",
2730    .version_id = 1,
2731    .minimum_version_id = 1,
2732    .needed = &virtio_disabled_needed,
2733    .fields = (VMStateField[]) {
2734        VMSTATE_BOOL(disabled, VirtIODevice),
2735        VMSTATE_END_OF_LIST()
2736    }
2737};
2738
2739static const VMStateDescription vmstate_virtio = {
2740    .name = "virtio",
2741    .version_id = 1,
2742    .minimum_version_id = 1,
2743    .fields = (VMStateField[]) {
2744        VMSTATE_END_OF_LIST()
2745    },
2746    .subsections = (const VMStateDescription*[]) {
2747        &vmstate_virtio_device_endian,
2748        &vmstate_virtio_64bit_features,
2749        &vmstate_virtio_virtqueues,
2750        &vmstate_virtio_ringsize,
2751        &vmstate_virtio_broken,
2752        &vmstate_virtio_extra_state,
2753        &vmstate_virtio_started,
2754        &vmstate_virtio_packed_virtqueues,
2755        &vmstate_virtio_disabled,
2756        NULL
2757    }
2758};
2759
2760int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2761{
2762    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2763    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2764    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2765    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2766    int i;
2767
2768    if (k->save_config) {
2769        k->save_config(qbus->parent, f);
2770    }
2771
2772    qemu_put_8s(f, &vdev->status);
2773    qemu_put_8s(f, &vdev->isr);
2774    qemu_put_be16s(f, &vdev->queue_sel);
2775    qemu_put_be32s(f, &guest_features_lo);
2776    qemu_put_be32(f, vdev->config_len);
2777    qemu_put_buffer(f, vdev->config, vdev->config_len);
2778
2779    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2780        if (vdev->vq[i].vring.num == 0)
2781            break;
2782    }
2783
2784    qemu_put_be32(f, i);
2785
2786    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2787        if (vdev->vq[i].vring.num == 0)
2788            break;
2789
2790        qemu_put_be32(f, vdev->vq[i].vring.num);
2791        if (k->has_variable_vring_alignment) {
2792            qemu_put_be32(f, vdev->vq[i].vring.align);
2793        }
2794        /*
2795         * Save desc now, the rest of the ring addresses are saved in
2796         * subsections for VIRTIO-1 devices.
2797         */
2798        qemu_put_be64(f, vdev->vq[i].vring.desc);
2799        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2800        if (k->save_queue) {
2801            k->save_queue(qbus->parent, i, f);
2802        }
2803    }
2804
2805    if (vdc->save != NULL) {
2806        vdc->save(vdev, f);
2807    }
2808
2809    if (vdc->vmsd) {
2810        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2811        if (ret) {
2812            return ret;
2813        }
2814    }
2815
2816    /* Subsections */
2817    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2818}
2819
2820/* A wrapper for use as a VMState .put function */
2821static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2822                              const VMStateField *field, JSONWriter *vmdesc)
2823{
2824    return virtio_save(VIRTIO_DEVICE(opaque), f);
2825}
2826
2827/* A wrapper for use as a VMState .get function */
2828static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2829                             const VMStateField *field)
2830{
2831    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2832    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2833
2834    return virtio_load(vdev, f, dc->vmsd->version_id);
2835}
2836
2837const VMStateInfo  virtio_vmstate_info = {
2838    .name = "virtio",
2839    .get = virtio_device_get,
2840    .put = virtio_device_put,
2841};
2842
2843static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2844{
2845    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2846    bool bad = (val & ~(vdev->host_features)) != 0;
2847
2848    val &= vdev->host_features;
2849    if (k->set_features) {
2850        k->set_features(vdev, val);
2851    }
2852    vdev->guest_features = val;
2853    return bad ? -1 : 0;
2854}
2855
2856int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2857{
2858    int ret;
2859    /*
2860     * The driver must not attempt to set features after feature negotiation
2861     * has finished.
2862     */
2863    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2864        return -EINVAL;
2865    }
2866
2867    if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2868        qemu_log_mask(LOG_GUEST_ERROR,
2869                      "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2870                      __func__, vdev->name);
2871    }
2872
2873    ret = virtio_set_features_nocheck(vdev, val);
2874    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2875        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2876        int i;
2877        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2878            if (vdev->vq[i].vring.num != 0) {
2879                virtio_init_region_cache(vdev, i);
2880            }
2881        }
2882    }
2883    if (!ret) {
2884        if (!virtio_device_started(vdev, vdev->status) &&
2885            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2886            vdev->start_on_kick = true;
2887        }
2888    }
2889    return ret;
2890}
2891
2892size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2893                              uint64_t host_features)
2894{
2895    size_t config_size = params->min_size;
2896    const VirtIOFeature *feature_sizes = params->feature_sizes;
2897    size_t i;
2898
2899    for (i = 0; feature_sizes[i].flags != 0; i++) {
2900        if (host_features & feature_sizes[i].flags) {
2901            config_size = MAX(feature_sizes[i].end, config_size);
2902        }
2903    }
2904
2905    assert(config_size <= params->max_size);
2906    return config_size;
2907}
2908
2909int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2910{
2911    int i, ret;
2912    int32_t config_len;
2913    uint32_t num;
2914    uint32_t features;
2915    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2916    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2917    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2918
2919    /*
2920     * We poison the endianness to ensure it does not get used before
2921     * subsections have been loaded.
2922     */
2923    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2924
2925    if (k->load_config) {
2926        ret = k->load_config(qbus->parent, f);
2927        if (ret)
2928            return ret;
2929    }
2930
2931    qemu_get_8s(f, &vdev->status);
2932    qemu_get_8s(f, &vdev->isr);
2933    qemu_get_be16s(f, &vdev->queue_sel);
2934    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2935        return -1;
2936    }
2937    qemu_get_be32s(f, &features);
2938
2939    /*
2940     * Temporarily set guest_features low bits - needed by
2941     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2942     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2943     *
2944     * Note: devices should always test host features in future - don't create
2945     * new dependencies like this.
2946     */
2947    vdev->guest_features = features;
2948
2949    config_len = qemu_get_be32(f);
2950
2951    /*
2952     * There are cases where the incoming config can be bigger or smaller
2953     * than what we have; so load what we have space for, and skip
2954     * any excess that's in the stream.
2955     */
2956    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2957
2958    while (config_len > vdev->config_len) {
2959        qemu_get_byte(f);
2960        config_len--;
2961    }
2962
2963    num = qemu_get_be32(f);
2964
2965    if (num > VIRTIO_QUEUE_MAX) {
2966        error_report("Invalid number of virtqueues: 0x%x", num);
2967        return -1;
2968    }
2969
2970    for (i = 0; i < num; i++) {
2971        vdev->vq[i].vring.num = qemu_get_be32(f);
2972        if (k->has_variable_vring_alignment) {
2973            vdev->vq[i].vring.align = qemu_get_be32(f);
2974        }
2975        vdev->vq[i].vring.desc = qemu_get_be64(f);
2976        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2977        vdev->vq[i].signalled_used_valid = false;
2978        vdev->vq[i].notification = true;
2979
2980        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2981            error_report("VQ %d address 0x0 "
2982                         "inconsistent with Host index 0x%x",
2983                         i, vdev->vq[i].last_avail_idx);
2984            return -1;
2985        }
2986        if (k->load_queue) {
2987            ret = k->load_queue(qbus->parent, i, f);
2988            if (ret)
2989                return ret;
2990        }
2991    }
2992
2993    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2994
2995    if (vdc->load != NULL) {
2996        ret = vdc->load(vdev, f, version_id);
2997        if (ret) {
2998            return ret;
2999        }
3000    }
3001
3002    if (vdc->vmsd) {
3003        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3004        if (ret) {
3005            return ret;
3006        }
3007    }
3008
3009    /* Subsections */
3010    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3011    if (ret) {
3012        return ret;
3013    }
3014
3015    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3016        vdev->device_endian = virtio_default_endian();
3017    }
3018
3019    if (virtio_64bit_features_needed(vdev)) {
3020        /*
3021         * Subsection load filled vdev->guest_features.  Run them
3022         * through virtio_set_features to sanity-check them against
3023         * host_features.
3024         */
3025        uint64_t features64 = vdev->guest_features;
3026        if (virtio_set_features_nocheck(vdev, features64) < 0) {
3027            error_report("Features 0x%" PRIx64 " unsupported. "
3028                         "Allowed features: 0x%" PRIx64,
3029                         features64, vdev->host_features);
3030            return -1;
3031        }
3032    } else {
3033        if (virtio_set_features_nocheck(vdev, features) < 0) {
3034            error_report("Features 0x%x unsupported. "
3035                         "Allowed features: 0x%" PRIx64,
3036                         features, vdev->host_features);
3037            return -1;
3038        }
3039    }
3040
3041    if (!virtio_device_started(vdev, vdev->status) &&
3042        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3043        vdev->start_on_kick = true;
3044    }
3045
3046    RCU_READ_LOCK_GUARD();
3047    for (i = 0; i < num; i++) {
3048        if (vdev->vq[i].vring.desc) {
3049            uint16_t nheads;
3050
3051            /*
3052             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3053             * only the region cache needs to be set up.  Legacy devices need
3054             * to calculate used and avail ring addresses based on the desc
3055             * address.
3056             */
3057            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3058                virtio_init_region_cache(vdev, i);
3059            } else {
3060                virtio_queue_update_rings(vdev, i);
3061            }
3062
3063            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3064                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3065                vdev->vq[i].shadow_avail_wrap_counter =
3066                                        vdev->vq[i].last_avail_wrap_counter;
3067                continue;
3068            }
3069
3070            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3071            /* Check it isn't doing strange things with descriptor numbers. */
3072            if (nheads > vdev->vq[i].vring.num) {
3073                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3074                             "inconsistent with Host index 0x%x: delta 0x%x",
3075                             i, vdev->vq[i].vring.num,
3076                             vring_avail_idx(&vdev->vq[i]),
3077                             vdev->vq[i].last_avail_idx, nheads);
3078                vdev->vq[i].used_idx = 0;
3079                vdev->vq[i].shadow_avail_idx = 0;
3080                vdev->vq[i].inuse = 0;
3081                continue;
3082            }
3083            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3084            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3085
3086            /*
3087             * Some devices migrate VirtQueueElements that have been popped
3088             * from the avail ring but not yet returned to the used ring.
3089             * Since max ring size < UINT16_MAX it's safe to use modulo
3090             * UINT16_MAX + 1 subtraction.
3091             */
3092            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3093                                vdev->vq[i].used_idx);
3094            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3095                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3096                             "used_idx 0x%x",
3097                             i, vdev->vq[i].vring.num,
3098                             vdev->vq[i].last_avail_idx,
3099                             vdev->vq[i].used_idx);
3100                return -1;
3101            }
3102        }
3103    }
3104
3105    if (vdc->post_load) {
3106        ret = vdc->post_load(vdev);
3107        if (ret) {
3108            return ret;
3109        }
3110    }
3111
3112    return 0;
3113}
3114
3115void virtio_cleanup(VirtIODevice *vdev)
3116{
3117    qemu_del_vm_change_state_handler(vdev->vmstate);
3118}
3119
3120static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3121{
3122    VirtIODevice *vdev = opaque;
3123    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3124    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3125    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3126    vdev->vm_running = running;
3127
3128    if (backend_run) {
3129        virtio_set_status(vdev, vdev->status);
3130    }
3131
3132    if (k->vmstate_change) {
3133        k->vmstate_change(qbus->parent, backend_run);
3134    }
3135
3136    if (!backend_run) {
3137        virtio_set_status(vdev, vdev->status);
3138    }
3139}
3140
3141void virtio_instance_init_common(Object *proxy_obj, void *data,
3142                                 size_t vdev_size, const char *vdev_name)
3143{
3144    DeviceState *vdev = data;
3145
3146    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3147                                       vdev_size, vdev_name, &error_abort,
3148                                       NULL);
3149    qdev_alias_all_properties(vdev, proxy_obj);
3150}
3151
3152void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3153{
3154    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3155    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3156    int i;
3157    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3158
3159    if (nvectors) {
3160        vdev->vector_queues =
3161            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3162    }
3163
3164    vdev->start_on_kick = false;
3165    vdev->started = false;
3166    vdev->vhost_started = false;
3167    vdev->device_id = device_id;
3168    vdev->status = 0;
3169    qatomic_set(&vdev->isr, 0);
3170    vdev->queue_sel = 0;
3171    vdev->config_vector = VIRTIO_NO_VECTOR;
3172    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3173    vdev->vm_running = runstate_is_running();
3174    vdev->broken = false;
3175    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3176        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3177        vdev->vq[i].vdev = vdev;
3178        vdev->vq[i].queue_index = i;
3179        vdev->vq[i].host_notifier_enabled = false;
3180    }
3181
3182    vdev->name = virtio_id_to_name(device_id);
3183    vdev->config_len = config_size;
3184    if (vdev->config_len) {
3185        vdev->config = g_malloc0(config_size);
3186    } else {
3187        vdev->config = NULL;
3188    }
3189    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3190            virtio_vmstate_change, vdev);
3191    vdev->device_endian = virtio_default_endian();
3192    vdev->use_guest_notifier_mask = true;
3193}
3194
3195/*
3196 * Only devices that have already been around prior to defining the virtio
3197 * standard support legacy mode; this includes devices not specified in the
3198 * standard. All newer devices conform to the virtio standard only.
3199 */
3200bool virtio_legacy_allowed(VirtIODevice *vdev)
3201{
3202    switch (vdev->device_id) {
3203    case VIRTIO_ID_NET:
3204    case VIRTIO_ID_BLOCK:
3205    case VIRTIO_ID_CONSOLE:
3206    case VIRTIO_ID_RNG:
3207    case VIRTIO_ID_BALLOON:
3208    case VIRTIO_ID_RPMSG:
3209    case VIRTIO_ID_SCSI:
3210    case VIRTIO_ID_9P:
3211    case VIRTIO_ID_RPROC_SERIAL:
3212    case VIRTIO_ID_CAIF:
3213        return true;
3214    default:
3215        return false;
3216    }
3217}
3218
3219bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3220{
3221    return vdev->disable_legacy_check;
3222}
3223
3224hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3225{
3226    return vdev->vq[n].vring.desc;
3227}
3228
3229bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3230{
3231    return virtio_queue_get_desc_addr(vdev, n) != 0;
3232}
3233
3234bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3235{
3236    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3237    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3238
3239    if (k->queue_enabled) {
3240        return k->queue_enabled(qbus->parent, n);
3241    }
3242    return virtio_queue_enabled_legacy(vdev, n);
3243}
3244
3245hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3246{
3247    return vdev->vq[n].vring.avail;
3248}
3249
3250hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3251{
3252    return vdev->vq[n].vring.used;
3253}
3254
3255hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3256{
3257    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3258}
3259
3260hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3261{
3262    int s;
3263
3264    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3265        return sizeof(struct VRingPackedDescEvent);
3266    }
3267
3268    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3269    return offsetof(VRingAvail, ring) +
3270        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3271}
3272
3273hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3274{
3275    int s;
3276
3277    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3278        return sizeof(struct VRingPackedDescEvent);
3279    }
3280
3281    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3282    return offsetof(VRingUsed, ring) +
3283        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3284}
3285
3286static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3287                                                           int n)
3288{
3289    unsigned int avail, used;
3290
3291    avail = vdev->vq[n].last_avail_idx;
3292    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3293
3294    used = vdev->vq[n].used_idx;
3295    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3296
3297    return avail | used << 16;
3298}
3299
3300static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3301                                                      int n)
3302{
3303    return vdev->vq[n].last_avail_idx;
3304}
3305
3306unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3307{
3308    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3309        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3310    } else {
3311        return virtio_queue_split_get_last_avail_idx(vdev, n);
3312    }
3313}
3314
3315static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3316                                                   int n, unsigned int idx)
3317{
3318    struct VirtQueue *vq = &vdev->vq[n];
3319
3320    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3321    vq->last_avail_wrap_counter =
3322        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3323    idx >>= 16;
3324    vq->used_idx = idx & 0x7ffff;
3325    vq->used_wrap_counter = !!(idx & 0x8000);
3326}
3327
3328static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3329                                                  int n, unsigned int idx)
3330{
3331        vdev->vq[n].last_avail_idx = idx;
3332        vdev->vq[n].shadow_avail_idx = idx;
3333}
3334
3335void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3336                                     unsigned int idx)
3337{
3338    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3339        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3340    } else {
3341        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3342    }
3343}
3344
3345static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3346                                                       int n)
3347{
3348    /* We don't have a reference like avail idx in shared memory */
3349    return;
3350}
3351
3352static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3353                                                      int n)
3354{
3355    RCU_READ_LOCK_GUARD();
3356    if (vdev->vq[n].vring.desc) {
3357        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3358        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3359    }
3360}
3361
3362void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3363{
3364    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3365        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3366    } else {
3367        virtio_queue_split_restore_last_avail_idx(vdev, n);
3368    }
3369}
3370
3371static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3372{
3373    /* used idx was updated through set_last_avail_idx() */
3374    return;
3375}
3376
3377static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3378{
3379    RCU_READ_LOCK_GUARD();
3380    if (vdev->vq[n].vring.desc) {
3381        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3382    }
3383}
3384
3385void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3386{
3387    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3388        return virtio_queue_packed_update_used_idx(vdev, n);
3389    } else {
3390        return virtio_split_packed_update_used_idx(vdev, n);
3391    }
3392}
3393
3394void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3395{
3396    vdev->vq[n].signalled_used_valid = false;
3397}
3398
3399VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3400{
3401    return vdev->vq + n;
3402}
3403
3404uint16_t virtio_get_queue_index(VirtQueue *vq)
3405{
3406    return vq->queue_index;
3407}
3408
3409static void virtio_queue_guest_notifier_read(EventNotifier *n)
3410{
3411    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3412    if (event_notifier_test_and_clear(n)) {
3413        virtio_irq(vq);
3414    }
3415}
3416static void virtio_config_guest_notifier_read(EventNotifier *n)
3417{
3418    VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3419
3420    if (event_notifier_test_and_clear(n)) {
3421        virtio_notify_config(vdev);
3422    }
3423}
3424void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3425                                                bool with_irqfd)
3426{
3427    if (assign && !with_irqfd) {
3428        event_notifier_set_handler(&vq->guest_notifier,
3429                                   virtio_queue_guest_notifier_read);
3430    } else {
3431        event_notifier_set_handler(&vq->guest_notifier, NULL);
3432    }
3433    if (!assign) {
3434        /* Test and clear notifier before closing it,
3435         * in case poll callback didn't have time to run. */
3436        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3437    }
3438}
3439
3440void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3441                                                 bool assign, bool with_irqfd)
3442{
3443    EventNotifier *n;
3444    n = &vdev->config_notifier;
3445    if (assign && !with_irqfd) {
3446        event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3447    } else {
3448        event_notifier_set_handler(n, NULL);
3449    }
3450    if (!assign) {
3451        /* Test and clear notifier before closing it,*/
3452        /* in case poll callback didn't have time to run. */
3453        virtio_config_guest_notifier_read(n);
3454    }
3455}
3456
3457EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3458{
3459    return &vq->guest_notifier;
3460}
3461
3462static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3463{
3464    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3465
3466    virtio_queue_set_notification(vq, 0);
3467}
3468
3469static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3470{
3471    EventNotifier *n = opaque;
3472    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3473
3474    return vq->vring.desc && !virtio_queue_empty(vq);
3475}
3476
3477static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3478{
3479    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3480
3481    virtio_queue_notify_vq(vq);
3482}
3483
3484static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3485{
3486    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3487
3488    /* Caller polls once more after this to catch requests that race with us */
3489    virtio_queue_set_notification(vq, 1);
3490}
3491
3492void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3493{
3494    aio_set_event_notifier(ctx, &vq->host_notifier, true,
3495                           virtio_queue_host_notifier_read,
3496                           virtio_queue_host_notifier_aio_poll,
3497                           virtio_queue_host_notifier_aio_poll_ready);
3498    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3499                                virtio_queue_host_notifier_aio_poll_begin,
3500                                virtio_queue_host_notifier_aio_poll_end);
3501}
3502
3503/*
3504 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3505 * this for rx virtqueues and similar cases where the virtqueue handler
3506 * function does not pop all elements. When the virtqueue is left non-empty
3507 * polling consumes CPU cycles and should not be used.
3508 */
3509void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3510{
3511    aio_set_event_notifier(ctx, &vq->host_notifier, true,
3512                           virtio_queue_host_notifier_read,
3513                           NULL, NULL);
3514}
3515
3516void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3517{
3518    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
3519    /* Test and clear notifier before after disabling event,
3520     * in case poll callback didn't have time to run. */
3521    virtio_queue_host_notifier_read(&vq->host_notifier);
3522}
3523
3524void virtio_queue_host_notifier_read(EventNotifier *n)
3525{
3526    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3527    if (event_notifier_test_and_clear(n)) {
3528        virtio_queue_notify_vq(vq);
3529    }
3530}
3531
3532EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3533{
3534    return &vq->host_notifier;
3535}
3536
3537EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3538{
3539    return &vdev->config_notifier;
3540}
3541
3542void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3543{
3544    vq->host_notifier_enabled = enabled;
3545}
3546
3547int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3548                                      MemoryRegion *mr, bool assign)
3549{
3550    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3551    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3552
3553    if (k->set_host_notifier_mr) {
3554        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3555    }
3556
3557    return -1;
3558}
3559
3560void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3561{
3562    g_free(vdev->bus_name);
3563    vdev->bus_name = g_strdup(bus_name);
3564}
3565
3566void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3567{
3568    va_list ap;
3569
3570    va_start(ap, fmt);
3571    error_vreport(fmt, ap);
3572    va_end(ap);
3573
3574    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3575        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3576        virtio_notify_config(vdev);
3577    }
3578
3579    vdev->broken = true;
3580}
3581
3582static void virtio_memory_listener_commit(MemoryListener *listener)
3583{
3584    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3585    int i;
3586
3587    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3588        if (vdev->vq[i].vring.num == 0) {
3589            break;
3590        }
3591        virtio_init_region_cache(vdev, i);
3592    }
3593}
3594
3595static void virtio_device_realize(DeviceState *dev, Error **errp)
3596{
3597    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3598    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3599    Error *err = NULL;
3600
3601    /* Devices should either use vmsd or the load/save methods */
3602    assert(!vdc->vmsd || !vdc->load);
3603
3604    if (vdc->realize != NULL) {
3605        vdc->realize(dev, &err);
3606        if (err != NULL) {
3607            error_propagate(errp, err);
3608            return;
3609        }
3610    }
3611
3612    virtio_bus_device_plugged(vdev, &err);
3613    if (err != NULL) {
3614        error_propagate(errp, err);
3615        vdc->unrealize(dev);
3616        return;
3617    }
3618
3619    vdev->listener.commit = virtio_memory_listener_commit;
3620    vdev->listener.name = "virtio";
3621    memory_listener_register(&vdev->listener, vdev->dma_as);
3622    QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
3623}
3624
3625static void virtio_device_unrealize(DeviceState *dev)
3626{
3627    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3628    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3629
3630    memory_listener_unregister(&vdev->listener);
3631    virtio_bus_device_unplugged(vdev);
3632
3633    if (vdc->unrealize != NULL) {
3634        vdc->unrealize(dev);
3635    }
3636
3637    QTAILQ_REMOVE(&virtio_list, vdev, next);
3638    g_free(vdev->bus_name);
3639    vdev->bus_name = NULL;
3640}
3641
3642static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3643{
3644    int i;
3645    if (!vdev->vq) {
3646        return;
3647    }
3648
3649    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3650        if (vdev->vq[i].vring.num == 0) {
3651            break;
3652        }
3653        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3654    }
3655    g_free(vdev->vq);
3656}
3657
3658static void virtio_device_instance_finalize(Object *obj)
3659{
3660    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3661
3662    virtio_device_free_virtqueues(vdev);
3663
3664    g_free(vdev->config);
3665    g_free(vdev->vector_queues);
3666}
3667
3668static Property virtio_properties[] = {
3669    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3670    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3671    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3672    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3673                     disable_legacy_check, false),
3674    DEFINE_PROP_END_OF_LIST(),
3675};
3676
3677static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3678{
3679    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3680    int i, n, r, err;
3681
3682    /*
3683     * Batch all the host notifiers in a single transaction to avoid
3684     * quadratic time complexity in address_space_update_ioeventfds().
3685     */
3686    memory_region_transaction_begin();
3687    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3688        VirtQueue *vq = &vdev->vq[n];
3689        if (!virtio_queue_get_num(vdev, n)) {
3690            continue;
3691        }
3692        r = virtio_bus_set_host_notifier(qbus, n, true);
3693        if (r < 0) {
3694            err = r;
3695            goto assign_error;
3696        }
3697        event_notifier_set_handler(&vq->host_notifier,
3698                                   virtio_queue_host_notifier_read);
3699    }
3700
3701    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3702        /* Kick right away to begin processing requests already in vring */
3703        VirtQueue *vq = &vdev->vq[n];
3704        if (!vq->vring.num) {
3705            continue;
3706        }
3707        event_notifier_set(&vq->host_notifier);
3708    }
3709    memory_region_transaction_commit();
3710    return 0;
3711
3712assign_error:
3713    i = n; /* save n for a second iteration after transaction is committed. */
3714    while (--n >= 0) {
3715        VirtQueue *vq = &vdev->vq[n];
3716        if (!virtio_queue_get_num(vdev, n)) {
3717            continue;
3718        }
3719
3720        event_notifier_set_handler(&vq->host_notifier, NULL);
3721        r = virtio_bus_set_host_notifier(qbus, n, false);
3722        assert(r >= 0);
3723    }
3724    /*
3725     * The transaction expects the ioeventfds to be open when it
3726     * commits. Do it now, before the cleanup loop.
3727     */
3728    memory_region_transaction_commit();
3729
3730    while (--i >= 0) {
3731        if (!virtio_queue_get_num(vdev, i)) {
3732            continue;
3733        }
3734        virtio_bus_cleanup_host_notifier(qbus, i);
3735    }
3736    return err;
3737}
3738
3739int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3740{
3741    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3742    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3743
3744    return virtio_bus_start_ioeventfd(vbus);
3745}
3746
3747static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3748{
3749    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3750    int n, r;
3751
3752    /*
3753     * Batch all the host notifiers in a single transaction to avoid
3754     * quadratic time complexity in address_space_update_ioeventfds().
3755     */
3756    memory_region_transaction_begin();
3757    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3758        VirtQueue *vq = &vdev->vq[n];
3759
3760        if (!virtio_queue_get_num(vdev, n)) {
3761            continue;
3762        }
3763        event_notifier_set_handler(&vq->host_notifier, NULL);
3764        r = virtio_bus_set_host_notifier(qbus, n, false);
3765        assert(r >= 0);
3766    }
3767    /*
3768     * The transaction expects the ioeventfds to be open when it
3769     * commits. Do it now, before the cleanup loop.
3770     */
3771    memory_region_transaction_commit();
3772
3773    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3774        if (!virtio_queue_get_num(vdev, n)) {
3775            continue;
3776        }
3777        virtio_bus_cleanup_host_notifier(qbus, n);
3778    }
3779}
3780
3781int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3782{
3783    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3784    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3785
3786    return virtio_bus_grab_ioeventfd(vbus);
3787}
3788
3789void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3790{
3791    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3792    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3793
3794    virtio_bus_release_ioeventfd(vbus);
3795}
3796
3797static void virtio_device_class_init(ObjectClass *klass, void *data)
3798{
3799    /* Set the default value here. */
3800    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3801    DeviceClass *dc = DEVICE_CLASS(klass);
3802
3803    dc->realize = virtio_device_realize;
3804    dc->unrealize = virtio_device_unrealize;
3805    dc->bus_type = TYPE_VIRTIO_BUS;
3806    device_class_set_props(dc, virtio_properties);
3807    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3808    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3809
3810    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3811
3812    QTAILQ_INIT(&virtio_list);
3813}
3814
3815bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3816{
3817    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3818    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3819
3820    return virtio_bus_ioeventfd_enabled(vbus);
3821}
3822
3823VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
3824                                                 uint16_t queue,
3825                                                 Error **errp)
3826{
3827    VirtIODevice *vdev;
3828    VirtQueueStatus *status;
3829
3830    vdev = qmp_find_virtio_device(path);
3831    if (vdev == NULL) {
3832        error_setg(errp, "Path %s is not a VirtIODevice", path);
3833        return NULL;
3834    }
3835
3836    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3837        error_setg(errp, "Invalid virtqueue number %d", queue);
3838        return NULL;
3839    }
3840
3841    status = g_new0(VirtQueueStatus, 1);
3842    status->name = g_strdup(vdev->name);
3843    status->queue_index = vdev->vq[queue].queue_index;
3844    status->inuse = vdev->vq[queue].inuse;
3845    status->vring_num = vdev->vq[queue].vring.num;
3846    status->vring_num_default = vdev->vq[queue].vring.num_default;
3847    status->vring_align = vdev->vq[queue].vring.align;
3848    status->vring_desc = vdev->vq[queue].vring.desc;
3849    status->vring_avail = vdev->vq[queue].vring.avail;
3850    status->vring_used = vdev->vq[queue].vring.used;
3851    status->used_idx = vdev->vq[queue].used_idx;
3852    status->signalled_used = vdev->vq[queue].signalled_used;
3853    status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
3854
3855    if (vdev->vhost_started) {
3856        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3857        struct vhost_dev *hdev = vdc->get_vhost(vdev);
3858
3859        /* check if vq index exists for vhost as well  */
3860        if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
3861            status->has_last_avail_idx = true;
3862
3863            int vhost_vq_index =
3864                hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
3865            struct vhost_vring_state state = {
3866                .index = vhost_vq_index,
3867            };
3868
3869            status->last_avail_idx =
3870                hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
3871        }
3872    } else {
3873        status->has_shadow_avail_idx = true;
3874        status->has_last_avail_idx = true;
3875        status->last_avail_idx = vdev->vq[queue].last_avail_idx;
3876        status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
3877    }
3878
3879    return status;
3880}
3881
3882static strList *qmp_decode_vring_desc_flags(uint16_t flags)
3883{
3884    strList *list = NULL;
3885    strList *node;
3886    int i;
3887
3888    struct {
3889        uint16_t flag;
3890        const char *value;
3891    } map[] = {
3892        { VRING_DESC_F_NEXT, "next" },
3893        { VRING_DESC_F_WRITE, "write" },
3894        { VRING_DESC_F_INDIRECT, "indirect" },
3895        { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
3896        { 1 << VRING_PACKED_DESC_F_USED, "used" },
3897        { 0, "" }
3898    };
3899
3900    for (i = 0; map[i].flag; i++) {
3901        if ((map[i].flag & flags) == 0) {
3902            continue;
3903        }
3904        node = g_malloc0(sizeof(strList));
3905        node->value = g_strdup(map[i].value);
3906        node->next = list;
3907        list = node;
3908    }
3909
3910    return list;
3911}
3912
3913VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
3914                                                     uint16_t queue,
3915                                                     bool has_index,
3916                                                     uint16_t index,
3917                                                     Error **errp)
3918{
3919    VirtIODevice *vdev;
3920    VirtQueue *vq;
3921    VirtioQueueElement *element = NULL;
3922
3923    vdev = qmp_find_virtio_device(path);
3924    if (vdev == NULL) {
3925        error_setg(errp, "Path %s is not a VirtIO device", path);
3926        return NULL;
3927    }
3928
3929    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3930        error_setg(errp, "Invalid virtqueue number %d", queue);
3931        return NULL;
3932    }
3933    vq = &vdev->vq[queue];
3934
3935    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3936        error_setg(errp, "Packed ring not supported");
3937        return NULL;
3938    } else {
3939        unsigned int head, i, max;
3940        VRingMemoryRegionCaches *caches;
3941        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
3942        MemoryRegionCache *desc_cache;
3943        VRingDesc desc;
3944        VirtioRingDescList *list = NULL;
3945        VirtioRingDescList *node;
3946        int rc; int ndescs;
3947
3948        RCU_READ_LOCK_GUARD();
3949
3950        max = vq->vring.num;
3951
3952        if (!has_index) {
3953            head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
3954        } else {
3955            head = vring_avail_ring(vq, index % vq->vring.num);
3956        }
3957        i = head;
3958
3959        caches = vring_get_region_caches(vq);
3960        if (!caches) {
3961            error_setg(errp, "Region caches not initialized");
3962            return NULL;
3963        }
3964        if (caches->desc.len < max * sizeof(VRingDesc)) {
3965            error_setg(errp, "Cannot map descriptor ring");
3966            return NULL;
3967        }
3968
3969        desc_cache = &caches->desc;
3970        vring_split_desc_read(vdev, &desc, desc_cache, i);
3971        if (desc.flags & VRING_DESC_F_INDIRECT) {
3972            int64_t len;
3973            len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
3974                                           desc.addr, desc.len, false);
3975            desc_cache = &indirect_desc_cache;
3976            if (len < desc.len) {
3977                error_setg(errp, "Cannot map indirect buffer");
3978                goto done;
3979            }
3980
3981            max = desc.len / sizeof(VRingDesc);
3982            i = 0;
3983            vring_split_desc_read(vdev, &desc, desc_cache, i);
3984        }
3985
3986        element = g_new0(VirtioQueueElement, 1);
3987        element->avail = g_new0(VirtioRingAvail, 1);
3988        element->used = g_new0(VirtioRingUsed, 1);
3989        element->name = g_strdup(vdev->name);
3990        element->index = head;
3991        element->avail->flags = vring_avail_flags(vq);
3992        element->avail->idx = vring_avail_idx(vq);
3993        element->avail->ring = head;
3994        element->used->flags = vring_used_flags(vq);
3995        element->used->idx = vring_used_idx(vq);
3996        ndescs = 0;
3997
3998        do {
3999            /* A buggy driver may produce an infinite loop */
4000            if (ndescs >= max) {
4001                break;
4002            }
4003            node = g_new0(VirtioRingDescList, 1);
4004            node->value = g_new0(VirtioRingDesc, 1);
4005            node->value->addr = desc.addr;
4006            node->value->len = desc.len;
4007            node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4008            node->next = list;
4009            list = node;
4010
4011            ndescs++;
4012            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4013                                                max, &i);
4014        } while (rc == VIRTQUEUE_READ_DESC_MORE);
4015        element->descs = list;
4016done:
4017        address_space_cache_destroy(&indirect_desc_cache);
4018    }
4019
4020    return element;
4021}
4022
4023static const TypeInfo virtio_device_info = {
4024    .name = TYPE_VIRTIO_DEVICE,
4025    .parent = TYPE_DEVICE,
4026    .instance_size = sizeof(VirtIODevice),
4027    .class_init = virtio_device_class_init,
4028    .instance_finalize = virtio_device_instance_finalize,
4029    .abstract = true,
4030    .class_size = sizeof(VirtioDeviceClass),
4031};
4032
4033static void virtio_register_types(void)
4034{
4035    type_register_static(&virtio_device_info);
4036}
4037
4038type_init(virtio_register_types)
4039