qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qapi/error.h"
  16#include "qapi/qapi-commands-virtio.h"
  17#include "trace.h"
  18#include "qemu/error-report.h"
  19#include "qemu/log.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/module.h"
  22#include "qom/object_interfaces.h"
  23#include "hw/core/cpu.h"
  24#include "hw/virtio/virtio.h"
  25#include "hw/virtio/vhost.h"
  26#include "migration/qemu-file-types.h"
  27#include "qemu/atomic.h"
  28#include "hw/virtio/virtio-bus.h"
  29#include "hw/qdev-properties.h"
  30#include "hw/virtio/virtio-access.h"
  31#include "sysemu/dma.h"
  32#include "sysemu/runstate.h"
  33#include "virtio-qmp.h"
  34
  35#include "standard-headers/linux/virtio_ids.h"
  36#include "standard-headers/linux/vhost_types.h"
  37#include "standard-headers/linux/virtio_blk.h"
  38#include "standard-headers/linux/virtio_console.h"
  39#include "standard-headers/linux/virtio_gpu.h"
  40#include "standard-headers/linux/virtio_net.h"
  41#include "standard-headers/linux/virtio_scsi.h"
  42#include "standard-headers/linux/virtio_i2c.h"
  43#include "standard-headers/linux/virtio_balloon.h"
  44#include "standard-headers/linux/virtio_iommu.h"
  45#include "standard-headers/linux/virtio_mem.h"
  46#include "standard-headers/linux/virtio_vsock.h"
  47
  48QmpVirtIODeviceList virtio_list;
  49
  50/*
  51 * Maximum size of virtio device config space
  52 */
  53#define VHOST_USER_MAX_CONFIG_SIZE 256
  54
  55/*
  56 * The alignment to use between consumer and producer parts of vring.
  57 * x86 pagesize again. This is the default, used by transports like PCI
  58 * which don't provide a means for the guest to tell the host the alignment.
  59 */
  60#define VIRTIO_PCI_VRING_ALIGN         4096
  61
  62typedef struct VRingDesc
  63{
  64    uint64_t addr;
  65    uint32_t len;
  66    uint16_t flags;
  67    uint16_t next;
  68} VRingDesc;
  69
  70typedef struct VRingPackedDesc {
  71    uint64_t addr;
  72    uint32_t len;
  73    uint16_t id;
  74    uint16_t flags;
  75} VRingPackedDesc;
  76
  77typedef struct VRingAvail
  78{
  79    uint16_t flags;
  80    uint16_t idx;
  81    uint16_t ring[];
  82} VRingAvail;
  83
  84typedef struct VRingUsedElem
  85{
  86    uint32_t id;
  87    uint32_t len;
  88} VRingUsedElem;
  89
  90typedef struct VRingUsed
  91{
  92    uint16_t flags;
  93    uint16_t idx;
  94    VRingUsedElem ring[];
  95} VRingUsed;
  96
  97typedef struct VRingMemoryRegionCaches {
  98    struct rcu_head rcu;
  99    MemoryRegionCache desc;
 100    MemoryRegionCache avail;
 101    MemoryRegionCache used;
 102} VRingMemoryRegionCaches;
 103
 104typedef struct VRing
 105{
 106    unsigned int num;
 107    unsigned int num_default;
 108    unsigned int align;
 109    hwaddr desc;
 110    hwaddr avail;
 111    hwaddr used;
 112    VRingMemoryRegionCaches *caches;
 113} VRing;
 114
 115typedef struct VRingPackedDescEvent {
 116    uint16_t off_wrap;
 117    uint16_t flags;
 118} VRingPackedDescEvent ;
 119
 120struct VirtQueue
 121{
 122    VRing vring;
 123    VirtQueueElement *used_elems;
 124
 125    /* Next head to pop */
 126    uint16_t last_avail_idx;
 127    bool last_avail_wrap_counter;
 128
 129    /* Last avail_idx read from VQ. */
 130    uint16_t shadow_avail_idx;
 131    bool shadow_avail_wrap_counter;
 132
 133    uint16_t used_idx;
 134    bool used_wrap_counter;
 135
 136    /* Last used index value we have signalled on */
 137    uint16_t signalled_used;
 138
 139    /* Last used index value we have signalled on */
 140    bool signalled_used_valid;
 141
 142    /* Notification enabled? */
 143    bool notification;
 144
 145    uint16_t queue_index;
 146
 147    unsigned int inuse;
 148
 149    uint16_t vector;
 150    VirtIOHandleOutput handle_output;
 151    VirtIODevice *vdev;
 152    EventNotifier guest_notifier;
 153    EventNotifier host_notifier;
 154    bool host_notifier_enabled;
 155    QLIST_ENTRY(VirtQueue) node;
 156};
 157
 158const char *virtio_device_names[] = {
 159    [VIRTIO_ID_NET] = "virtio-net",
 160    [VIRTIO_ID_BLOCK] = "virtio-blk",
 161    [VIRTIO_ID_CONSOLE] = "virtio-serial",
 162    [VIRTIO_ID_RNG] = "virtio-rng",
 163    [VIRTIO_ID_BALLOON] = "virtio-balloon",
 164    [VIRTIO_ID_IOMEM] = "virtio-iomem",
 165    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
 166    [VIRTIO_ID_SCSI] = "virtio-scsi",
 167    [VIRTIO_ID_9P] = "virtio-9p",
 168    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
 169    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
 170    [VIRTIO_ID_CAIF] = "virtio-caif",
 171    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
 172    [VIRTIO_ID_GPU] = "virtio-gpu",
 173    [VIRTIO_ID_CLOCK] = "virtio-clk",
 174    [VIRTIO_ID_INPUT] = "virtio-input",
 175    [VIRTIO_ID_VSOCK] = "vhost-vsock",
 176    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
 177    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
 178    [VIRTIO_ID_PSTORE] = "virtio-pstore",
 179    [VIRTIO_ID_IOMMU] = "virtio-iommu",
 180    [VIRTIO_ID_MEM] = "virtio-mem",
 181    [VIRTIO_ID_SOUND] = "virtio-sound",
 182    [VIRTIO_ID_FS] = "virtio-user-fs",
 183    [VIRTIO_ID_PMEM] = "virtio-pmem",
 184    [VIRTIO_ID_RPMB] = "virtio-rpmb",
 185    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
 186    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
 187    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
 188    [VIRTIO_ID_SCMI] = "virtio-scmi",
 189    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
 190    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
 191    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
 192    [VIRTIO_ID_CAN] = "virtio-can",
 193    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
 194    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
 195    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
 196    [VIRTIO_ID_BT] = "virtio-bluetooth",
 197    [VIRTIO_ID_GPIO] = "virtio-gpio"
 198};
 199
 200static const char *virtio_id_to_name(uint16_t device_id)
 201{
 202    assert(device_id < G_N_ELEMENTS(virtio_device_names));
 203    const char *name = virtio_device_names[device_id];
 204    assert(name != NULL);
 205    return name;
 206}
 207
 208/* Called within call_rcu().  */
 209static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 210{
 211    assert(caches != NULL);
 212    address_space_cache_destroy(&caches->desc);
 213    address_space_cache_destroy(&caches->avail);
 214    address_space_cache_destroy(&caches->used);
 215    g_free(caches);
 216}
 217
 218static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
 219{
 220    VRingMemoryRegionCaches *caches;
 221
 222    caches = qatomic_read(&vq->vring.caches);
 223    qatomic_rcu_set(&vq->vring.caches, NULL);
 224    if (caches) {
 225        call_rcu(caches, virtio_free_region_cache, rcu);
 226    }
 227}
 228
 229void virtio_init_region_cache(VirtIODevice *vdev, int n)
 230{
 231    VirtQueue *vq = &vdev->vq[n];
 232    VRingMemoryRegionCaches *old = vq->vring.caches;
 233    VRingMemoryRegionCaches *new = NULL;
 234    hwaddr addr, size;
 235    int64_t len;
 236    bool packed;
 237
 238
 239    addr = vq->vring.desc;
 240    if (!addr) {
 241        goto out_no_cache;
 242    }
 243    new = g_new0(VRingMemoryRegionCaches, 1);
 244    size = virtio_queue_get_desc_size(vdev, n);
 245    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
 246                                   true : false;
 247    len = address_space_cache_init(&new->desc, vdev->dma_as,
 248                                   addr, size, packed);
 249    if (len < size) {
 250        virtio_error(vdev, "Cannot map desc");
 251        goto err_desc;
 252    }
 253
 254    size = virtio_queue_get_used_size(vdev, n);
 255    len = address_space_cache_init(&new->used, vdev->dma_as,
 256                                   vq->vring.used, size, true);
 257    if (len < size) {
 258        virtio_error(vdev, "Cannot map used");
 259        goto err_used;
 260    }
 261
 262    size = virtio_queue_get_avail_size(vdev, n);
 263    len = address_space_cache_init(&new->avail, vdev->dma_as,
 264                                   vq->vring.avail, size, false);
 265    if (len < size) {
 266        virtio_error(vdev, "Cannot map avail");
 267        goto err_avail;
 268    }
 269
 270    qatomic_rcu_set(&vq->vring.caches, new);
 271    if (old) {
 272        call_rcu(old, virtio_free_region_cache, rcu);
 273    }
 274    return;
 275
 276err_avail:
 277    address_space_cache_destroy(&new->avail);
 278err_used:
 279    address_space_cache_destroy(&new->used);
 280err_desc:
 281    address_space_cache_destroy(&new->desc);
 282out_no_cache:
 283    g_free(new);
 284    virtio_virtqueue_reset_region_cache(vq);
 285}
 286
 287/* virt queue functions */
 288void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 289{
 290    VRing *vring = &vdev->vq[n].vring;
 291
 292    if (!vring->num || !vring->desc || !vring->align) {
 293        /* not yet setup -> nothing to do */
 294        return;
 295    }
 296    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
 297    vring->used = vring_align(vring->avail +
 298                              offsetof(VRingAvail, ring[vring->num]),
 299                              vring->align);
 300    virtio_init_region_cache(vdev, n);
 301}
 302
 303/* Called within rcu_read_lock().  */
 304static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
 305                                  MemoryRegionCache *cache, int i)
 306{
 307    address_space_read_cached(cache, i * sizeof(VRingDesc),
 308                              desc, sizeof(VRingDesc));
 309    virtio_tswap64s(vdev, &desc->addr);
 310    virtio_tswap32s(vdev, &desc->len);
 311    virtio_tswap16s(vdev, &desc->flags);
 312    virtio_tswap16s(vdev, &desc->next);
 313}
 314
 315static void vring_packed_event_read(VirtIODevice *vdev,
 316                                    MemoryRegionCache *cache,
 317                                    VRingPackedDescEvent *e)
 318{
 319    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
 320    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 321
 322    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
 323    /* Make sure flags is seen before off_wrap */
 324    smp_rmb();
 325    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
 326    virtio_tswap16s(vdev, &e->flags);
 327}
 328
 329static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 330                                        MemoryRegionCache *cache,
 331                                        uint16_t off_wrap)
 332{
 333    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 334
 335    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
 336    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 337}
 338
 339static void vring_packed_flags_write(VirtIODevice *vdev,
 340                                     MemoryRegionCache *cache, uint16_t flags)
 341{
 342    hwaddr off = offsetof(VRingPackedDescEvent, flags);
 343
 344    virtio_stw_phys_cached(vdev, cache, off, flags);
 345    address_space_cache_invalidate(cache, off, sizeof(flags));
 346}
 347
 348/* Called within rcu_read_lock().  */
 349static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 350{
 351    return qatomic_rcu_read(&vq->vring.caches);
 352}
 353
 354/* Called within rcu_read_lock().  */
 355static inline uint16_t vring_avail_flags(VirtQueue *vq)
 356{
 357    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 358    hwaddr pa = offsetof(VRingAvail, flags);
 359
 360    if (!caches) {
 361        return 0;
 362    }
 363
 364    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 365}
 366
 367/* Called within rcu_read_lock().  */
 368static inline uint16_t vring_avail_idx(VirtQueue *vq)
 369{
 370    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 371    hwaddr pa = offsetof(VRingAvail, idx);
 372
 373    if (!caches) {
 374        return 0;
 375    }
 376
 377    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 378    return vq->shadow_avail_idx;
 379}
 380
 381/* Called within rcu_read_lock().  */
 382static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 383{
 384    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 385    hwaddr pa = offsetof(VRingAvail, ring[i]);
 386
 387    if (!caches) {
 388        return 0;
 389    }
 390
 391    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
 392}
 393
 394/* Called within rcu_read_lock().  */
 395static inline uint16_t vring_get_used_event(VirtQueue *vq)
 396{
 397    return vring_avail_ring(vq, vq->vring.num);
 398}
 399
 400/* Called within rcu_read_lock().  */
 401static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
 402                                    int i)
 403{
 404    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 405    hwaddr pa = offsetof(VRingUsed, ring[i]);
 406
 407    if (!caches) {
 408        return;
 409    }
 410
 411    virtio_tswap32s(vq->vdev, &uelem->id);
 412    virtio_tswap32s(vq->vdev, &uelem->len);
 413    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
 414    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
 415}
 416
 417/* Called within rcu_read_lock(). */
 418static inline uint16_t vring_used_flags(VirtQueue *vq)
 419{
 420    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 421    hwaddr pa = offsetof(VRingUsed, flags);
 422
 423    if (!caches) {
 424        return 0;
 425    }
 426
 427    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 428}
 429
 430/* Called within rcu_read_lock().  */
 431static uint16_t vring_used_idx(VirtQueue *vq)
 432{
 433    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 434    hwaddr pa = offsetof(VRingUsed, idx);
 435
 436    if (!caches) {
 437        return 0;
 438    }
 439
 440    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 441}
 442
 443/* Called within rcu_read_lock().  */
 444static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 445{
 446    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 447    hwaddr pa = offsetof(VRingUsed, idx);
 448
 449    if (caches) {
 450        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 451        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 452    }
 453
 454    vq->used_idx = val;
 455}
 456
 457/* Called within rcu_read_lock().  */
 458static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 459{
 460    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 461    VirtIODevice *vdev = vq->vdev;
 462    hwaddr pa = offsetof(VRingUsed, flags);
 463    uint16_t flags;
 464
 465    if (!caches) {
 466        return;
 467    }
 468
 469    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 470    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
 471    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 472}
 473
 474/* Called within rcu_read_lock().  */
 475static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 476{
 477    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
 478    VirtIODevice *vdev = vq->vdev;
 479    hwaddr pa = offsetof(VRingUsed, flags);
 480    uint16_t flags;
 481
 482    if (!caches) {
 483        return;
 484    }
 485
 486    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
 487    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
 488    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
 489}
 490
 491/* Called within rcu_read_lock().  */
 492static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
 493{
 494    VRingMemoryRegionCaches *caches;
 495    hwaddr pa;
 496    if (!vq->notification) {
 497        return;
 498    }
 499
 500    caches = vring_get_region_caches(vq);
 501    if (!caches) {
 502        return;
 503    }
 504
 505    pa = offsetof(VRingUsed, ring[vq->vring.num]);
 506    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
 507    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 508}
 509
 510static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
 511{
 512    RCU_READ_LOCK_GUARD();
 513
 514    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 515        vring_set_avail_event(vq, vring_avail_idx(vq));
 516    } else if (enable) {
 517        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 518    } else {
 519        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 520    }
 521    if (enable) {
 522        /* Expose avail event/used flags before caller checks the avail idx. */
 523        smp_mb();
 524    }
 525}
 526
 527static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
 528{
 529    uint16_t off_wrap;
 530    VRingPackedDescEvent e;
 531    VRingMemoryRegionCaches *caches;
 532
 533    RCU_READ_LOCK_GUARD();
 534    caches = vring_get_region_caches(vq);
 535    if (!caches) {
 536        return;
 537    }
 538
 539    vring_packed_event_read(vq->vdev, &caches->used, &e);
 540
 541    if (!enable) {
 542        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
 543    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 544        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
 545        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
 546        /* Make sure off_wrap is wrote before flags */
 547        smp_wmb();
 548        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
 549    } else {
 550        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
 551    }
 552
 553    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
 554    if (enable) {
 555        /* Expose avail event/used flags before caller checks the avail idx. */
 556        smp_mb();
 557    }
 558}
 559
 560bool virtio_queue_get_notification(VirtQueue *vq)
 561{
 562    return vq->notification;
 563}
 564
 565void virtio_queue_set_notification(VirtQueue *vq, int enable)
 566{
 567    vq->notification = enable;
 568
 569    if (!vq->vring.desc) {
 570        return;
 571    }
 572
 573    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 574        virtio_queue_packed_set_notification(vq, enable);
 575    } else {
 576        virtio_queue_split_set_notification(vq, enable);
 577    }
 578}
 579
 580int virtio_queue_ready(VirtQueue *vq)
 581{
 582    return vq->vring.avail != 0;
 583}
 584
 585static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 586                                         uint16_t *flags,
 587                                         MemoryRegionCache *cache,
 588                                         int i)
 589{
 590    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 591
 592    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 593}
 594
 595static void vring_packed_desc_read(VirtIODevice *vdev,
 596                                   VRingPackedDesc *desc,
 597                                   MemoryRegionCache *cache,
 598                                   int i, bool strict_order)
 599{
 600    hwaddr off = i * sizeof(VRingPackedDesc);
 601
 602    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
 603
 604    if (strict_order) {
 605        /* Make sure flags is read before the rest fields. */
 606        smp_rmb();
 607    }
 608
 609    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
 610                              &desc->addr, sizeof(desc->addr));
 611    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
 612                              &desc->id, sizeof(desc->id));
 613    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
 614                              &desc->len, sizeof(desc->len));
 615    virtio_tswap64s(vdev, &desc->addr);
 616    virtio_tswap16s(vdev, &desc->id);
 617    virtio_tswap32s(vdev, &desc->len);
 618}
 619
 620static void vring_packed_desc_write_data(VirtIODevice *vdev,
 621                                         VRingPackedDesc *desc,
 622                                         MemoryRegionCache *cache,
 623                                         int i)
 624{
 625    hwaddr off_id = i * sizeof(VRingPackedDesc) +
 626                    offsetof(VRingPackedDesc, id);
 627    hwaddr off_len = i * sizeof(VRingPackedDesc) +
 628                    offsetof(VRingPackedDesc, len);
 629
 630    virtio_tswap32s(vdev, &desc->len);
 631    virtio_tswap16s(vdev, &desc->id);
 632    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
 633    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
 634    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
 635    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
 636}
 637
 638static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 639                                          VRingPackedDesc *desc,
 640                                          MemoryRegionCache *cache,
 641                                          int i)
 642{
 643    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 644
 645    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
 646    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 647}
 648
 649static void vring_packed_desc_write(VirtIODevice *vdev,
 650                                    VRingPackedDesc *desc,
 651                                    MemoryRegionCache *cache,
 652                                    int i, bool strict_order)
 653{
 654    vring_packed_desc_write_data(vdev, desc, cache, i);
 655    if (strict_order) {
 656        /* Make sure data is wrote before flags. */
 657        smp_wmb();
 658    }
 659    vring_packed_desc_write_flags(vdev, desc, cache, i);
 660}
 661
 662static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
 663{
 664    bool avail, used;
 665
 666    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 667    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 668    return (avail != used) && (avail == wrap_counter);
 669}
 670
 671/* Fetch avail_idx from VQ memory only when we really need to know if
 672 * guest has added some buffers.
 673 * Called within rcu_read_lock().  */
 674static int virtio_queue_empty_rcu(VirtQueue *vq)
 675{
 676    if (virtio_device_disabled(vq->vdev)) {
 677        return 1;
 678    }
 679
 680    if (unlikely(!vq->vring.avail)) {
 681        return 1;
 682    }
 683
 684    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 685        return 0;
 686    }
 687
 688    return vring_avail_idx(vq) == vq->last_avail_idx;
 689}
 690
 691static int virtio_queue_split_empty(VirtQueue *vq)
 692{
 693    bool empty;
 694
 695    if (virtio_device_disabled(vq->vdev)) {
 696        return 1;
 697    }
 698
 699    if (unlikely(!vq->vring.avail)) {
 700        return 1;
 701    }
 702
 703    if (vq->shadow_avail_idx != vq->last_avail_idx) {
 704        return 0;
 705    }
 706
 707    RCU_READ_LOCK_GUARD();
 708    empty = vring_avail_idx(vq) == vq->last_avail_idx;
 709    return empty;
 710}
 711
 712/* Called within rcu_read_lock().  */
 713static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 714{
 715    struct VRingPackedDesc desc;
 716    VRingMemoryRegionCaches *cache;
 717
 718    if (unlikely(!vq->vring.desc)) {
 719        return 1;
 720    }
 721
 722    cache = vring_get_region_caches(vq);
 723    if (!cache) {
 724        return 1;
 725    }
 726
 727    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
 728                                 vq->last_avail_idx);
 729
 730    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
 731}
 732
 733static int virtio_queue_packed_empty(VirtQueue *vq)
 734{
 735    RCU_READ_LOCK_GUARD();
 736    return virtio_queue_packed_empty_rcu(vq);
 737}
 738
 739int virtio_queue_empty(VirtQueue *vq)
 740{
 741    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 742        return virtio_queue_packed_empty(vq);
 743    } else {
 744        return virtio_queue_split_empty(vq);
 745    }
 746}
 747
 748static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
 749                               unsigned int len)
 750{
 751    AddressSpace *dma_as = vq->vdev->dma_as;
 752    unsigned int offset;
 753    int i;
 754
 755    offset = 0;
 756    for (i = 0; i < elem->in_num; i++) {
 757        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 758
 759        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
 760                         elem->in_sg[i].iov_len,
 761                         DMA_DIRECTION_FROM_DEVICE, size);
 762
 763        offset += size;
 764    }
 765
 766    for (i = 0; i < elem->out_num; i++)
 767        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
 768                         elem->out_sg[i].iov_len,
 769                         DMA_DIRECTION_TO_DEVICE,
 770                         elem->out_sg[i].iov_len);
 771}
 772
 773/* virtqueue_detach_element:
 774 * @vq: The #VirtQueue
 775 * @elem: The #VirtQueueElement
 776 * @len: number of bytes written
 777 *
 778 * Detach the element from the virtqueue.  This function is suitable for device
 779 * reset or other situations where a #VirtQueueElement is simply freed and will
 780 * not be pushed or discarded.
 781 */
 782void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
 783                              unsigned int len)
 784{
 785    vq->inuse -= elem->ndescs;
 786    virtqueue_unmap_sg(vq, elem, len);
 787}
 788
 789static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
 790{
 791    vq->last_avail_idx -= num;
 792}
 793
 794static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
 795{
 796    if (vq->last_avail_idx < num) {
 797        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
 798        vq->last_avail_wrap_counter ^= 1;
 799    } else {
 800        vq->last_avail_idx -= num;
 801    }
 802}
 803
 804/* virtqueue_unpop:
 805 * @vq: The #VirtQueue
 806 * @elem: The #VirtQueueElement
 807 * @len: number of bytes written
 808 *
 809 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 810 * call to virtqueue_pop() will refetch the element.
 811 */
 812void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
 813                     unsigned int len)
 814{
 815
 816    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 817        virtqueue_packed_rewind(vq, 1);
 818    } else {
 819        virtqueue_split_rewind(vq, 1);
 820    }
 821
 822    virtqueue_detach_element(vq, elem, len);
 823}
 824
 825/* virtqueue_rewind:
 826 * @vq: The #VirtQueue
 827 * @num: Number of elements to push back
 828 *
 829 * Pretend that elements weren't popped from the virtqueue.  The next
 830 * virtqueue_pop() will refetch the oldest element.
 831 *
 832 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
 833 *
 834 * Returns: true on success, false if @num is greater than the number of in use
 835 * elements.
 836 */
 837bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 838{
 839    if (num > vq->inuse) {
 840        return false;
 841    }
 842
 843    vq->inuse -= num;
 844    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 845        virtqueue_packed_rewind(vq, num);
 846    } else {
 847        virtqueue_split_rewind(vq, num);
 848    }
 849    return true;
 850}
 851
 852static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 853                    unsigned int len, unsigned int idx)
 854{
 855    VRingUsedElem uelem;
 856
 857    if (unlikely(!vq->vring.used)) {
 858        return;
 859    }
 860
 861    idx = (idx + vq->used_idx) % vq->vring.num;
 862
 863    uelem.id = elem->index;
 864    uelem.len = len;
 865    vring_used_write(vq, &uelem, idx);
 866}
 867
 868static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
 869                                  unsigned int len, unsigned int idx)
 870{
 871    vq->used_elems[idx].index = elem->index;
 872    vq->used_elems[idx].len = len;
 873    vq->used_elems[idx].ndescs = elem->ndescs;
 874}
 875
 876static void virtqueue_packed_fill_desc(VirtQueue *vq,
 877                                       const VirtQueueElement *elem,
 878                                       unsigned int idx,
 879                                       bool strict_order)
 880{
 881    uint16_t head;
 882    VRingMemoryRegionCaches *caches;
 883    VRingPackedDesc desc = {
 884        .id = elem->index,
 885        .len = elem->len,
 886    };
 887    bool wrap_counter = vq->used_wrap_counter;
 888
 889    if (unlikely(!vq->vring.desc)) {
 890        return;
 891    }
 892
 893    head = vq->used_idx + idx;
 894    if (head >= vq->vring.num) {
 895        head -= vq->vring.num;
 896        wrap_counter ^= 1;
 897    }
 898    if (wrap_counter) {
 899        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
 900        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
 901    } else {
 902        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
 903        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
 904    }
 905
 906    caches = vring_get_region_caches(vq);
 907    if (!caches) {
 908        return;
 909    }
 910
 911    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
 912}
 913
 914/* Called within rcu_read_lock().  */
 915void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 916                    unsigned int len, unsigned int idx)
 917{
 918    trace_virtqueue_fill(vq, elem, len, idx);
 919
 920    virtqueue_unmap_sg(vq, elem, len);
 921
 922    if (virtio_device_disabled(vq->vdev)) {
 923        return;
 924    }
 925
 926    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 927        virtqueue_packed_fill(vq, elem, len, idx);
 928    } else {
 929        virtqueue_split_fill(vq, elem, len, idx);
 930    }
 931}
 932
 933/* Called within rcu_read_lock().  */
 934static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
 935{
 936    uint16_t old, new;
 937
 938    if (unlikely(!vq->vring.used)) {
 939        return;
 940    }
 941
 942    /* Make sure buffer is written before we update index. */
 943    smp_wmb();
 944    trace_virtqueue_flush(vq, count);
 945    old = vq->used_idx;
 946    new = old + count;
 947    vring_used_idx_set(vq, new);
 948    vq->inuse -= count;
 949    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 950        vq->signalled_used_valid = false;
 951}
 952
 953static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
 954{
 955    unsigned int i, ndescs = 0;
 956
 957    if (unlikely(!vq->vring.desc)) {
 958        return;
 959    }
 960
 961    for (i = 1; i < count; i++) {
 962        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
 963        ndescs += vq->used_elems[i].ndescs;
 964    }
 965    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
 966    ndescs += vq->used_elems[0].ndescs;
 967
 968    vq->inuse -= ndescs;
 969    vq->used_idx += ndescs;
 970    if (vq->used_idx >= vq->vring.num) {
 971        vq->used_idx -= vq->vring.num;
 972        vq->used_wrap_counter ^= 1;
 973        vq->signalled_used_valid = false;
 974    }
 975}
 976
 977void virtqueue_flush(VirtQueue *vq, unsigned int count)
 978{
 979    if (virtio_device_disabled(vq->vdev)) {
 980        vq->inuse -= count;
 981        return;
 982    }
 983
 984    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
 985        virtqueue_packed_flush(vq, count);
 986    } else {
 987        virtqueue_split_flush(vq, count);
 988    }
 989}
 990
 991void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 992                    unsigned int len)
 993{
 994    RCU_READ_LOCK_GUARD();
 995    virtqueue_fill(vq, elem, len, 0);
 996    virtqueue_flush(vq, 1);
 997}
 998
 999/* Called within rcu_read_lock().  */
1000static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1001{
1002    uint16_t num_heads = vring_avail_idx(vq) - idx;
1003
1004    /* Check it isn't doing very strange things with descriptor numbers. */
1005    if (num_heads > vq->vring.num) {
1006        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1007                     idx, vq->shadow_avail_idx);
1008        return -EINVAL;
1009    }
1010    /* On success, callers read a descriptor at vq->last_avail_idx.
1011     * Make sure descriptor read does not bypass avail index read. */
1012    if (num_heads) {
1013        smp_rmb();
1014    }
1015
1016    return num_heads;
1017}
1018
1019/* Called within rcu_read_lock().  */
1020static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1021                               unsigned int *head)
1022{
1023    /* Grab the next descriptor number they're advertising, and increment
1024     * the index we've seen. */
1025    *head = vring_avail_ring(vq, idx % vq->vring.num);
1026
1027    /* If their number is silly, that's a fatal mistake. */
1028    if (*head >= vq->vring.num) {
1029        virtio_error(vq->vdev, "Guest says index %u is available", *head);
1030        return false;
1031    }
1032
1033    return true;
1034}
1035
1036enum {
1037    VIRTQUEUE_READ_DESC_ERROR = -1,
1038    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1039    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1040};
1041
1042static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1043                                          MemoryRegionCache *desc_cache,
1044                                          unsigned int max, unsigned int *next)
1045{
1046    /* If this descriptor says it doesn't chain, we're done. */
1047    if (!(desc->flags & VRING_DESC_F_NEXT)) {
1048        return VIRTQUEUE_READ_DESC_DONE;
1049    }
1050
1051    /* Check they're not leading us off end of descriptors. */
1052    *next = desc->next;
1053    /* Make sure compiler knows to grab that: we don't want it changing! */
1054    smp_wmb();
1055
1056    if (*next >= max) {
1057        virtio_error(vdev, "Desc next is %u", *next);
1058        return VIRTQUEUE_READ_DESC_ERROR;
1059    }
1060
1061    vring_split_desc_read(vdev, desc, desc_cache, *next);
1062    return VIRTQUEUE_READ_DESC_MORE;
1063}
1064
1065/* Called within rcu_read_lock().  */
1066static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1067                            unsigned int *in_bytes, unsigned int *out_bytes,
1068                            unsigned max_in_bytes, unsigned max_out_bytes,
1069                            VRingMemoryRegionCaches *caches)
1070{
1071    VirtIODevice *vdev = vq->vdev;
1072    unsigned int idx;
1073    unsigned int total_bufs, in_total, out_total;
1074    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1075    int64_t len = 0;
1076    int rc;
1077
1078    idx = vq->last_avail_idx;
1079    total_bufs = in_total = out_total = 0;
1080
1081    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1082        MemoryRegionCache *desc_cache = &caches->desc;
1083        unsigned int num_bufs;
1084        VRingDesc desc;
1085        unsigned int i;
1086        unsigned int max = vq->vring.num;
1087
1088        num_bufs = total_bufs;
1089
1090        if (!virtqueue_get_head(vq, idx++, &i)) {
1091            goto err;
1092        }
1093
1094        vring_split_desc_read(vdev, &desc, desc_cache, i);
1095
1096        if (desc.flags & VRING_DESC_F_INDIRECT) {
1097            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1098                virtio_error(vdev, "Invalid size for indirect buffer table");
1099                goto err;
1100            }
1101
1102            /* If we've got too many, that implies a descriptor loop. */
1103            if (num_bufs >= max) {
1104                virtio_error(vdev, "Looped descriptor");
1105                goto err;
1106            }
1107
1108            /* loop over the indirect descriptor table */
1109            len = address_space_cache_init(&indirect_desc_cache,
1110                                           vdev->dma_as,
1111                                           desc.addr, desc.len, false);
1112            desc_cache = &indirect_desc_cache;
1113            if (len < desc.len) {
1114                virtio_error(vdev, "Cannot map indirect buffer");
1115                goto err;
1116            }
1117
1118            max = desc.len / sizeof(VRingDesc);
1119            num_bufs = i = 0;
1120            vring_split_desc_read(vdev, &desc, desc_cache, i);
1121        }
1122
1123        do {
1124            /* If we've got too many, that implies a descriptor loop. */
1125            if (++num_bufs > max) {
1126                virtio_error(vdev, "Looped descriptor");
1127                goto err;
1128            }
1129
1130            if (desc.flags & VRING_DESC_F_WRITE) {
1131                in_total += desc.len;
1132            } else {
1133                out_total += desc.len;
1134            }
1135            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1136                goto done;
1137            }
1138
1139            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1140        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1141
1142        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1143            goto err;
1144        }
1145
1146        if (desc_cache == &indirect_desc_cache) {
1147            address_space_cache_destroy(&indirect_desc_cache);
1148            total_bufs++;
1149        } else {
1150            total_bufs = num_bufs;
1151        }
1152    }
1153
1154    if (rc < 0) {
1155        goto err;
1156    }
1157
1158done:
1159    address_space_cache_destroy(&indirect_desc_cache);
1160    if (in_bytes) {
1161        *in_bytes = in_total;
1162    }
1163    if (out_bytes) {
1164        *out_bytes = out_total;
1165    }
1166    return;
1167
1168err:
1169    in_total = out_total = 0;
1170    goto done;
1171}
1172
1173static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1174                                           VRingPackedDesc *desc,
1175                                           MemoryRegionCache
1176                                           *desc_cache,
1177                                           unsigned int max,
1178                                           unsigned int *next,
1179                                           bool indirect)
1180{
1181    /* If this descriptor says it doesn't chain, we're done. */
1182    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1183        return VIRTQUEUE_READ_DESC_DONE;
1184    }
1185
1186    ++*next;
1187    if (*next == max) {
1188        if (indirect) {
1189            return VIRTQUEUE_READ_DESC_DONE;
1190        } else {
1191            (*next) -= vq->vring.num;
1192        }
1193    }
1194
1195    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1196    return VIRTQUEUE_READ_DESC_MORE;
1197}
1198
1199/* Called within rcu_read_lock().  */
1200static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1201                                             unsigned int *in_bytes,
1202                                             unsigned int *out_bytes,
1203                                             unsigned max_in_bytes,
1204                                             unsigned max_out_bytes,
1205                                             VRingMemoryRegionCaches *caches)
1206{
1207    VirtIODevice *vdev = vq->vdev;
1208    unsigned int idx;
1209    unsigned int total_bufs, in_total, out_total;
1210    MemoryRegionCache *desc_cache;
1211    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1212    int64_t len = 0;
1213    VRingPackedDesc desc;
1214    bool wrap_counter;
1215
1216    idx = vq->last_avail_idx;
1217    wrap_counter = vq->last_avail_wrap_counter;
1218    total_bufs = in_total = out_total = 0;
1219
1220    for (;;) {
1221        unsigned int num_bufs = total_bufs;
1222        unsigned int i = idx;
1223        int rc;
1224        unsigned int max = vq->vring.num;
1225
1226        desc_cache = &caches->desc;
1227
1228        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1229        if (!is_desc_avail(desc.flags, wrap_counter)) {
1230            break;
1231        }
1232
1233        if (desc.flags & VRING_DESC_F_INDIRECT) {
1234            if (desc.len % sizeof(VRingPackedDesc)) {
1235                virtio_error(vdev, "Invalid size for indirect buffer table");
1236                goto err;
1237            }
1238
1239            /* If we've got too many, that implies a descriptor loop. */
1240            if (num_bufs >= max) {
1241                virtio_error(vdev, "Looped descriptor");
1242                goto err;
1243            }
1244
1245            /* loop over the indirect descriptor table */
1246            len = address_space_cache_init(&indirect_desc_cache,
1247                                           vdev->dma_as,
1248                                           desc.addr, desc.len, false);
1249            desc_cache = &indirect_desc_cache;
1250            if (len < desc.len) {
1251                virtio_error(vdev, "Cannot map indirect buffer");
1252                goto err;
1253            }
1254
1255            max = desc.len / sizeof(VRingPackedDesc);
1256            num_bufs = i = 0;
1257            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1258        }
1259
1260        do {
1261            /* If we've got too many, that implies a descriptor loop. */
1262            if (++num_bufs > max) {
1263                virtio_error(vdev, "Looped descriptor");
1264                goto err;
1265            }
1266
1267            if (desc.flags & VRING_DESC_F_WRITE) {
1268                in_total += desc.len;
1269            } else {
1270                out_total += desc.len;
1271            }
1272            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1273                goto done;
1274            }
1275
1276            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1277                                                 &i, desc_cache ==
1278                                                 &indirect_desc_cache);
1279        } while (rc == VIRTQUEUE_READ_DESC_MORE);
1280
1281        if (desc_cache == &indirect_desc_cache) {
1282            address_space_cache_destroy(&indirect_desc_cache);
1283            total_bufs++;
1284            idx++;
1285        } else {
1286            idx += num_bufs - total_bufs;
1287            total_bufs = num_bufs;
1288        }
1289
1290        if (idx >= vq->vring.num) {
1291            idx -= vq->vring.num;
1292            wrap_counter ^= 1;
1293        }
1294    }
1295
1296    /* Record the index and wrap counter for a kick we want */
1297    vq->shadow_avail_idx = idx;
1298    vq->shadow_avail_wrap_counter = wrap_counter;
1299done:
1300    address_space_cache_destroy(&indirect_desc_cache);
1301    if (in_bytes) {
1302        *in_bytes = in_total;
1303    }
1304    if (out_bytes) {
1305        *out_bytes = out_total;
1306    }
1307    return;
1308
1309err:
1310    in_total = out_total = 0;
1311    goto done;
1312}
1313
1314void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1315                               unsigned int *out_bytes,
1316                               unsigned max_in_bytes, unsigned max_out_bytes)
1317{
1318    uint16_t desc_size;
1319    VRingMemoryRegionCaches *caches;
1320
1321    RCU_READ_LOCK_GUARD();
1322
1323    if (unlikely(!vq->vring.desc)) {
1324        goto err;
1325    }
1326
1327    caches = vring_get_region_caches(vq);
1328    if (!caches) {
1329        goto err;
1330    }
1331
1332    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1333                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1334    if (caches->desc.len < vq->vring.num * desc_size) {
1335        virtio_error(vq->vdev, "Cannot map descriptor ring");
1336        goto err;
1337    }
1338
1339    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1340        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1341                                         max_in_bytes, max_out_bytes,
1342                                         caches);
1343    } else {
1344        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1345                                        max_in_bytes, max_out_bytes,
1346                                        caches);
1347    }
1348
1349    return;
1350err:
1351    if (in_bytes) {
1352        *in_bytes = 0;
1353    }
1354    if (out_bytes) {
1355        *out_bytes = 0;
1356    }
1357}
1358
1359int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1360                          unsigned int out_bytes)
1361{
1362    unsigned int in_total, out_total;
1363
1364    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1365    return in_bytes <= in_total && out_bytes <= out_total;
1366}
1367
1368static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1369                               hwaddr *addr, struct iovec *iov,
1370                               unsigned int max_num_sg, bool is_write,
1371                               hwaddr pa, size_t sz)
1372{
1373    bool ok = false;
1374    unsigned num_sg = *p_num_sg;
1375    assert(num_sg <= max_num_sg);
1376
1377    if (!sz) {
1378        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1379        goto out;
1380    }
1381
1382    while (sz) {
1383        hwaddr len = sz;
1384
1385        if (num_sg == max_num_sg) {
1386            virtio_error(vdev, "virtio: too many write descriptors in "
1387                               "indirect table");
1388            goto out;
1389        }
1390
1391        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1392                                              is_write ?
1393                                              DMA_DIRECTION_FROM_DEVICE :
1394                                              DMA_DIRECTION_TO_DEVICE,
1395                                              MEMTXATTRS_UNSPECIFIED);
1396        if (!iov[num_sg].iov_base) {
1397            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1398            goto out;
1399        }
1400
1401        iov[num_sg].iov_len = len;
1402        addr[num_sg] = pa;
1403
1404        sz -= len;
1405        pa += len;
1406        num_sg++;
1407    }
1408    ok = true;
1409
1410out:
1411    *p_num_sg = num_sg;
1412    return ok;
1413}
1414
1415/* Only used by error code paths before we have a VirtQueueElement (therefore
1416 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1417 * yet.
1418 */
1419static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1420                                    struct iovec *iov)
1421{
1422    unsigned int i;
1423
1424    for (i = 0; i < out_num + in_num; i++) {
1425        int is_write = i >= out_num;
1426
1427        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1428        iov++;
1429    }
1430}
1431
1432static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1433                                hwaddr *addr, unsigned int num_sg,
1434                                bool is_write)
1435{
1436    unsigned int i;
1437    hwaddr len;
1438
1439    for (i = 0; i < num_sg; i++) {
1440        len = sg[i].iov_len;
1441        sg[i].iov_base = dma_memory_map(vdev->dma_as,
1442                                        addr[i], &len, is_write ?
1443                                        DMA_DIRECTION_FROM_DEVICE :
1444                                        DMA_DIRECTION_TO_DEVICE,
1445                                        MEMTXATTRS_UNSPECIFIED);
1446        if (!sg[i].iov_base) {
1447            error_report("virtio: error trying to map MMIO memory");
1448            exit(1);
1449        }
1450        if (len != sg[i].iov_len) {
1451            error_report("virtio: unexpected memory split");
1452            exit(1);
1453        }
1454    }
1455}
1456
1457void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1458{
1459    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1460    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1461                                                                        false);
1462}
1463
1464static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1465{
1466    VirtQueueElement *elem;
1467    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1468    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1469    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1470    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1471    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1472    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1473
1474    assert(sz >= sizeof(VirtQueueElement));
1475    elem = g_malloc(out_sg_end);
1476    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1477    elem->out_num = out_num;
1478    elem->in_num = in_num;
1479    elem->in_addr = (void *)elem + in_addr_ofs;
1480    elem->out_addr = (void *)elem + out_addr_ofs;
1481    elem->in_sg = (void *)elem + in_sg_ofs;
1482    elem->out_sg = (void *)elem + out_sg_ofs;
1483    return elem;
1484}
1485
1486static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1487{
1488    unsigned int i, head, max;
1489    VRingMemoryRegionCaches *caches;
1490    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1491    MemoryRegionCache *desc_cache;
1492    int64_t len;
1493    VirtIODevice *vdev = vq->vdev;
1494    VirtQueueElement *elem = NULL;
1495    unsigned out_num, in_num, elem_entries;
1496    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1497    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1498    VRingDesc desc;
1499    int rc;
1500
1501    RCU_READ_LOCK_GUARD();
1502    if (virtio_queue_empty_rcu(vq)) {
1503        goto done;
1504    }
1505    /* Needed after virtio_queue_empty(), see comment in
1506     * virtqueue_num_heads(). */
1507    smp_rmb();
1508
1509    /* When we start there are none of either input nor output. */
1510    out_num = in_num = elem_entries = 0;
1511
1512    max = vq->vring.num;
1513
1514    if (vq->inuse >= vq->vring.num) {
1515        virtio_error(vdev, "Virtqueue size exceeded");
1516        goto done;
1517    }
1518
1519    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1520        goto done;
1521    }
1522
1523    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1524        vring_set_avail_event(vq, vq->last_avail_idx);
1525    }
1526
1527    i = head;
1528
1529    caches = vring_get_region_caches(vq);
1530    if (!caches) {
1531        virtio_error(vdev, "Region caches not initialized");
1532        goto done;
1533    }
1534
1535    if (caches->desc.len < max * sizeof(VRingDesc)) {
1536        virtio_error(vdev, "Cannot map descriptor ring");
1537        goto done;
1538    }
1539
1540    desc_cache = &caches->desc;
1541    vring_split_desc_read(vdev, &desc, desc_cache, i);
1542    if (desc.flags & VRING_DESC_F_INDIRECT) {
1543        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1544            virtio_error(vdev, "Invalid size for indirect buffer table");
1545            goto done;
1546        }
1547
1548        /* loop over the indirect descriptor table */
1549        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1550                                       desc.addr, desc.len, false);
1551        desc_cache = &indirect_desc_cache;
1552        if (len < desc.len) {
1553            virtio_error(vdev, "Cannot map indirect buffer");
1554            goto done;
1555        }
1556
1557        max = desc.len / sizeof(VRingDesc);
1558        i = 0;
1559        vring_split_desc_read(vdev, &desc, desc_cache, i);
1560    }
1561
1562    /* Collect all the descriptors */
1563    do {
1564        bool map_ok;
1565
1566        if (desc.flags & VRING_DESC_F_WRITE) {
1567            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1568                                        iov + out_num,
1569                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1570                                        desc.addr, desc.len);
1571        } else {
1572            if (in_num) {
1573                virtio_error(vdev, "Incorrect order for descriptors");
1574                goto err_undo_map;
1575            }
1576            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1577                                        VIRTQUEUE_MAX_SIZE, false,
1578                                        desc.addr, desc.len);
1579        }
1580        if (!map_ok) {
1581            goto err_undo_map;
1582        }
1583
1584        /* If we've got too many, that implies a descriptor loop. */
1585        if (++elem_entries > max) {
1586            virtio_error(vdev, "Looped descriptor");
1587            goto err_undo_map;
1588        }
1589
1590        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1591    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1592
1593    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1594        goto err_undo_map;
1595    }
1596
1597    /* Now copy what we have collected and mapped */
1598    elem = virtqueue_alloc_element(sz, out_num, in_num);
1599    elem->index = head;
1600    elem->ndescs = 1;
1601    for (i = 0; i < out_num; i++) {
1602        elem->out_addr[i] = addr[i];
1603        elem->out_sg[i] = iov[i];
1604    }
1605    for (i = 0; i < in_num; i++) {
1606        elem->in_addr[i] = addr[out_num + i];
1607        elem->in_sg[i] = iov[out_num + i];
1608    }
1609
1610    vq->inuse++;
1611
1612    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1613done:
1614    address_space_cache_destroy(&indirect_desc_cache);
1615
1616    return elem;
1617
1618err_undo_map:
1619    virtqueue_undo_map_desc(out_num, in_num, iov);
1620    goto done;
1621}
1622
1623static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1624{
1625    unsigned int i, max;
1626    VRingMemoryRegionCaches *caches;
1627    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1628    MemoryRegionCache *desc_cache;
1629    int64_t len;
1630    VirtIODevice *vdev = vq->vdev;
1631    VirtQueueElement *elem = NULL;
1632    unsigned out_num, in_num, elem_entries;
1633    hwaddr addr[VIRTQUEUE_MAX_SIZE];
1634    struct iovec iov[VIRTQUEUE_MAX_SIZE];
1635    VRingPackedDesc desc;
1636    uint16_t id;
1637    int rc;
1638
1639    RCU_READ_LOCK_GUARD();
1640    if (virtio_queue_packed_empty_rcu(vq)) {
1641        goto done;
1642    }
1643
1644    /* When we start there are none of either input nor output. */
1645    out_num = in_num = elem_entries = 0;
1646
1647    max = vq->vring.num;
1648
1649    if (vq->inuse >= vq->vring.num) {
1650        virtio_error(vdev, "Virtqueue size exceeded");
1651        goto done;
1652    }
1653
1654    i = vq->last_avail_idx;
1655
1656    caches = vring_get_region_caches(vq);
1657    if (!caches) {
1658        virtio_error(vdev, "Region caches not initialized");
1659        goto done;
1660    }
1661
1662    if (caches->desc.len < max * sizeof(VRingDesc)) {
1663        virtio_error(vdev, "Cannot map descriptor ring");
1664        goto done;
1665    }
1666
1667    desc_cache = &caches->desc;
1668    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1669    id = desc.id;
1670    if (desc.flags & VRING_DESC_F_INDIRECT) {
1671        if (desc.len % sizeof(VRingPackedDesc)) {
1672            virtio_error(vdev, "Invalid size for indirect buffer table");
1673            goto done;
1674        }
1675
1676        /* loop over the indirect descriptor table */
1677        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1678                                       desc.addr, desc.len, false);
1679        desc_cache = &indirect_desc_cache;
1680        if (len < desc.len) {
1681            virtio_error(vdev, "Cannot map indirect buffer");
1682            goto done;
1683        }
1684
1685        max = desc.len / sizeof(VRingPackedDesc);
1686        i = 0;
1687        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1688    }
1689
1690    /* Collect all the descriptors */
1691    do {
1692        bool map_ok;
1693
1694        if (desc.flags & VRING_DESC_F_WRITE) {
1695            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1696                                        iov + out_num,
1697                                        VIRTQUEUE_MAX_SIZE - out_num, true,
1698                                        desc.addr, desc.len);
1699        } else {
1700            if (in_num) {
1701                virtio_error(vdev, "Incorrect order for descriptors");
1702                goto err_undo_map;
1703            }
1704            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1705                                        VIRTQUEUE_MAX_SIZE, false,
1706                                        desc.addr, desc.len);
1707        }
1708        if (!map_ok) {
1709            goto err_undo_map;
1710        }
1711
1712        /* If we've got too many, that implies a descriptor loop. */
1713        if (++elem_entries > max) {
1714            virtio_error(vdev, "Looped descriptor");
1715            goto err_undo_map;
1716        }
1717
1718        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1719                                             desc_cache ==
1720                                             &indirect_desc_cache);
1721    } while (rc == VIRTQUEUE_READ_DESC_MORE);
1722
1723    /* Now copy what we have collected and mapped */
1724    elem = virtqueue_alloc_element(sz, out_num, in_num);
1725    for (i = 0; i < out_num; i++) {
1726        elem->out_addr[i] = addr[i];
1727        elem->out_sg[i] = iov[i];
1728    }
1729    for (i = 0; i < in_num; i++) {
1730        elem->in_addr[i] = addr[out_num + i];
1731        elem->in_sg[i] = iov[out_num + i];
1732    }
1733
1734    elem->index = id;
1735    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1736    vq->last_avail_idx += elem->ndescs;
1737    vq->inuse += elem->ndescs;
1738
1739    if (vq->last_avail_idx >= vq->vring.num) {
1740        vq->last_avail_idx -= vq->vring.num;
1741        vq->last_avail_wrap_counter ^= 1;
1742    }
1743
1744    vq->shadow_avail_idx = vq->last_avail_idx;
1745    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1746
1747    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1748done:
1749    address_space_cache_destroy(&indirect_desc_cache);
1750
1751    return elem;
1752
1753err_undo_map:
1754    virtqueue_undo_map_desc(out_num, in_num, iov);
1755    goto done;
1756}
1757
1758void *virtqueue_pop(VirtQueue *vq, size_t sz)
1759{
1760    if (virtio_device_disabled(vq->vdev)) {
1761        return NULL;
1762    }
1763
1764    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1765        return virtqueue_packed_pop(vq, sz);
1766    } else {
1767        return virtqueue_split_pop(vq, sz);
1768    }
1769}
1770
1771static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1772{
1773    VRingMemoryRegionCaches *caches;
1774    MemoryRegionCache *desc_cache;
1775    unsigned int dropped = 0;
1776    VirtQueueElement elem = {};
1777    VirtIODevice *vdev = vq->vdev;
1778    VRingPackedDesc desc;
1779
1780    RCU_READ_LOCK_GUARD();
1781
1782    caches = vring_get_region_caches(vq);
1783    if (!caches) {
1784        return 0;
1785    }
1786
1787    desc_cache = &caches->desc;
1788
1789    virtio_queue_set_notification(vq, 0);
1790
1791    while (vq->inuse < vq->vring.num) {
1792        unsigned int idx = vq->last_avail_idx;
1793        /*
1794         * works similar to virtqueue_pop but does not map buffers
1795         * and does not allocate any memory.
1796         */
1797        vring_packed_desc_read(vdev, &desc, desc_cache,
1798                               vq->last_avail_idx , true);
1799        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1800            break;
1801        }
1802        elem.index = desc.id;
1803        elem.ndescs = 1;
1804        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1805                                               vq->vring.num, &idx, false)) {
1806            ++elem.ndescs;
1807        }
1808        /*
1809         * immediately push the element, nothing to unmap
1810         * as both in_num and out_num are set to 0.
1811         */
1812        virtqueue_push(vq, &elem, 0);
1813        dropped++;
1814        vq->last_avail_idx += elem.ndescs;
1815        if (vq->last_avail_idx >= vq->vring.num) {
1816            vq->last_avail_idx -= vq->vring.num;
1817            vq->last_avail_wrap_counter ^= 1;
1818        }
1819    }
1820
1821    return dropped;
1822}
1823
1824static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1825{
1826    unsigned int dropped = 0;
1827    VirtQueueElement elem = {};
1828    VirtIODevice *vdev = vq->vdev;
1829    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1830
1831    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1832        /* works similar to virtqueue_pop but does not map buffers
1833        * and does not allocate any memory */
1834        smp_rmb();
1835        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1836            break;
1837        }
1838        vq->inuse++;
1839        vq->last_avail_idx++;
1840        if (fEventIdx) {
1841            vring_set_avail_event(vq, vq->last_avail_idx);
1842        }
1843        /* immediately push the element, nothing to unmap
1844         * as both in_num and out_num are set to 0 */
1845        virtqueue_push(vq, &elem, 0);
1846        dropped++;
1847    }
1848
1849    return dropped;
1850}
1851
1852/* virtqueue_drop_all:
1853 * @vq: The #VirtQueue
1854 * Drops all queued buffers and indicates them to the guest
1855 * as if they are done. Useful when buffers can not be
1856 * processed but must be returned to the guest.
1857 */
1858unsigned int virtqueue_drop_all(VirtQueue *vq)
1859{
1860    struct VirtIODevice *vdev = vq->vdev;
1861
1862    if (virtio_device_disabled(vq->vdev)) {
1863        return 0;
1864    }
1865
1866    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1867        return virtqueue_packed_drop_all(vq);
1868    } else {
1869        return virtqueue_split_drop_all(vq);
1870    }
1871}
1872
1873/* Reading and writing a structure directly to QEMUFile is *awful*, but
1874 * it is what QEMU has always done by mistake.  We can change it sooner
1875 * or later by bumping the version number of the affected vm states.
1876 * In the meanwhile, since the in-memory layout of VirtQueueElement
1877 * has changed, we need to marshal to and from the layout that was
1878 * used before the change.
1879 */
1880typedef struct VirtQueueElementOld {
1881    unsigned int index;
1882    unsigned int out_num;
1883    unsigned int in_num;
1884    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1885    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1886    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1887    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1888} VirtQueueElementOld;
1889
1890void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1891{
1892    VirtQueueElement *elem;
1893    VirtQueueElementOld data;
1894    int i;
1895
1896    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897
1898    /* TODO: teach all callers that this can fail, and return failure instead
1899     * of asserting here.
1900     * This is just one thing (there are probably more) that must be
1901     * fixed before we can allow NDEBUG compilation.
1902     */
1903    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1904    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1905
1906    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1907    elem->index = data.index;
1908
1909    for (i = 0; i < elem->in_num; i++) {
1910        elem->in_addr[i] = data.in_addr[i];
1911    }
1912
1913    for (i = 0; i < elem->out_num; i++) {
1914        elem->out_addr[i] = data.out_addr[i];
1915    }
1916
1917    for (i = 0; i < elem->in_num; i++) {
1918        /* Base is overwritten by virtqueue_map.  */
1919        elem->in_sg[i].iov_base = 0;
1920        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1921    }
1922
1923    for (i = 0; i < elem->out_num; i++) {
1924        /* Base is overwritten by virtqueue_map.  */
1925        elem->out_sg[i].iov_base = 0;
1926        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1927    }
1928
1929    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1930        qemu_get_be32s(f, &elem->ndescs);
1931    }
1932
1933    virtqueue_map(vdev, elem);
1934    return elem;
1935}
1936
1937void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1938                                VirtQueueElement *elem)
1939{
1940    VirtQueueElementOld data;
1941    int i;
1942
1943    memset(&data, 0, sizeof(data));
1944    data.index = elem->index;
1945    data.in_num = elem->in_num;
1946    data.out_num = elem->out_num;
1947
1948    for (i = 0; i < elem->in_num; i++) {
1949        data.in_addr[i] = elem->in_addr[i];
1950    }
1951
1952    for (i = 0; i < elem->out_num; i++) {
1953        data.out_addr[i] = elem->out_addr[i];
1954    }
1955
1956    for (i = 0; i < elem->in_num; i++) {
1957        /* Base is overwritten by virtqueue_map when loading.  Do not
1958         * save it, as it would leak the QEMU address space layout.  */
1959        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1960    }
1961
1962    for (i = 0; i < elem->out_num; i++) {
1963        /* Do not save iov_base as above.  */
1964        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1965    }
1966
1967    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1968        qemu_put_be32s(f, &elem->ndescs);
1969    }
1970
1971    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1972}
1973
1974/* virtio device */
1975static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1976{
1977    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1978    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1979
1980    if (virtio_device_disabled(vdev)) {
1981        return;
1982    }
1983
1984    if (k->notify) {
1985        k->notify(qbus->parent, vector);
1986    }
1987}
1988
1989void virtio_update_irq(VirtIODevice *vdev)
1990{
1991    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1992}
1993
1994static int virtio_validate_features(VirtIODevice *vdev)
1995{
1996    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1997
1998    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1999        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2000        return -EFAULT;
2001    }
2002
2003    if (k->validate_features) {
2004        return k->validate_features(vdev);
2005    } else {
2006        return 0;
2007    }
2008}
2009
2010int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2011{
2012    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2013    trace_virtio_set_status(vdev, val);
2014
2015    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2016        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2017            val & VIRTIO_CONFIG_S_FEATURES_OK) {
2018            int ret = virtio_validate_features(vdev);
2019
2020            if (ret) {
2021                return ret;
2022            }
2023        }
2024    }
2025
2026    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2027        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2028        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2029    }
2030
2031    if (k->set_status) {
2032        k->set_status(vdev, val);
2033    }
2034    vdev->status = val;
2035
2036    return 0;
2037}
2038
2039static enum virtio_device_endian virtio_default_endian(void)
2040{
2041    if (target_words_bigendian()) {
2042        return VIRTIO_DEVICE_ENDIAN_BIG;
2043    } else {
2044        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2045    }
2046}
2047
2048static enum virtio_device_endian virtio_current_cpu_endian(void)
2049{
2050    if (cpu_virtio_is_big_endian(current_cpu)) {
2051        return VIRTIO_DEVICE_ENDIAN_BIG;
2052    } else {
2053        return VIRTIO_DEVICE_ENDIAN_LITTLE;
2054    }
2055}
2056
2057static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2058{
2059    vdev->vq[i].vring.desc = 0;
2060    vdev->vq[i].vring.avail = 0;
2061    vdev->vq[i].vring.used = 0;
2062    vdev->vq[i].last_avail_idx = 0;
2063    vdev->vq[i].shadow_avail_idx = 0;
2064    vdev->vq[i].used_idx = 0;
2065    vdev->vq[i].last_avail_wrap_counter = true;
2066    vdev->vq[i].shadow_avail_wrap_counter = true;
2067    vdev->vq[i].used_wrap_counter = true;
2068    virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2069    vdev->vq[i].signalled_used = 0;
2070    vdev->vq[i].signalled_used_valid = false;
2071    vdev->vq[i].notification = true;
2072    vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2073    vdev->vq[i].inuse = 0;
2074    virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2075}
2076
2077void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2078{
2079    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080
2081    if (k->queue_reset) {
2082        k->queue_reset(vdev, queue_index);
2083    }
2084
2085    __virtio_queue_reset(vdev, queue_index);
2086}
2087
2088void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2089{
2090    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2091
2092    /*
2093     * TODO: Seabios is currently out of spec and triggering this error.
2094     * So this needs to be fixed in Seabios, then this can
2095     * be re-enabled for new machine types only, and also after
2096     * being converted to LOG_GUEST_ERROR.
2097     *
2098    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2099        error_report("queue_enable is only suppported in devices of virtio "
2100                     "1.0 or later.");
2101    }
2102    */
2103
2104    if (k->queue_enable) {
2105        k->queue_enable(vdev, queue_index);
2106    }
2107}
2108
2109void virtio_reset(void *opaque)
2110{
2111    VirtIODevice *vdev = opaque;
2112    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2113    int i;
2114
2115    virtio_set_status(vdev, 0);
2116    if (current_cpu) {
2117        /* Guest initiated reset */
2118        vdev->device_endian = virtio_current_cpu_endian();
2119    } else {
2120        /* System reset */
2121        vdev->device_endian = virtio_default_endian();
2122    }
2123
2124    if (k->reset) {
2125        k->reset(vdev);
2126    }
2127
2128    vdev->start_on_kick = false;
2129    vdev->started = false;
2130    vdev->broken = false;
2131    vdev->guest_features = 0;
2132    vdev->queue_sel = 0;
2133    vdev->status = 0;
2134    vdev->disabled = false;
2135    qatomic_set(&vdev->isr, 0);
2136    vdev->config_vector = VIRTIO_NO_VECTOR;
2137    virtio_notify_vector(vdev, vdev->config_vector);
2138
2139    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2140        __virtio_queue_reset(vdev, i);
2141    }
2142}
2143
2144void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2145{
2146    if (!vdev->vq[n].vring.num) {
2147        return;
2148    }
2149    vdev->vq[n].vring.desc = addr;
2150    virtio_queue_update_rings(vdev, n);
2151}
2152
2153hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2154{
2155    return vdev->vq[n].vring.desc;
2156}
2157
2158void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2159                            hwaddr avail, hwaddr used)
2160{
2161    if (!vdev->vq[n].vring.num) {
2162        return;
2163    }
2164    vdev->vq[n].vring.desc = desc;
2165    vdev->vq[n].vring.avail = avail;
2166    vdev->vq[n].vring.used = used;
2167    virtio_init_region_cache(vdev, n);
2168}
2169
2170void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2171{
2172    /* Don't allow guest to flip queue between existent and
2173     * nonexistent states, or to set it to an invalid size.
2174     */
2175    if (!!num != !!vdev->vq[n].vring.num ||
2176        num > VIRTQUEUE_MAX_SIZE ||
2177        num < 0) {
2178        return;
2179    }
2180    vdev->vq[n].vring.num = num;
2181}
2182
2183VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2184{
2185    return QLIST_FIRST(&vdev->vector_queues[vector]);
2186}
2187
2188VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2189{
2190    return QLIST_NEXT(vq, node);
2191}
2192
2193int virtio_queue_get_num(VirtIODevice *vdev, int n)
2194{
2195    return vdev->vq[n].vring.num;
2196}
2197
2198int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2199{
2200    return vdev->vq[n].vring.num_default;
2201}
2202
2203int virtio_get_num_queues(VirtIODevice *vdev)
2204{
2205    int i;
2206
2207    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2208        if (!virtio_queue_get_num(vdev, i)) {
2209            break;
2210        }
2211    }
2212
2213    return i;
2214}
2215
2216void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2217{
2218    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2219    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2220
2221    /* virtio-1 compliant devices cannot change the alignment */
2222    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2223        error_report("tried to modify queue alignment for virtio-1 device");
2224        return;
2225    }
2226    /* Check that the transport told us it was going to do this
2227     * (so a buggy transport will immediately assert rather than
2228     * silently failing to migrate this state)
2229     */
2230    assert(k->has_variable_vring_alignment);
2231
2232    if (align) {
2233        vdev->vq[n].vring.align = align;
2234        virtio_queue_update_rings(vdev, n);
2235    }
2236}
2237
2238static void virtio_queue_notify_vq(VirtQueue *vq)
2239{
2240    if (vq->vring.desc && vq->handle_output) {
2241        VirtIODevice *vdev = vq->vdev;
2242
2243        if (unlikely(vdev->broken)) {
2244            return;
2245        }
2246
2247        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2248        vq->handle_output(vdev, vq);
2249
2250        if (unlikely(vdev->start_on_kick)) {
2251            virtio_set_started(vdev, true);
2252        }
2253    }
2254}
2255
2256void virtio_queue_notify(VirtIODevice *vdev, int n)
2257{
2258    VirtQueue *vq = &vdev->vq[n];
2259
2260    if (unlikely(!vq->vring.desc || vdev->broken)) {
2261        return;
2262    }
2263
2264    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2265    if (vq->host_notifier_enabled) {
2266        event_notifier_set(&vq->host_notifier);
2267    } else if (vq->handle_output) {
2268        vq->handle_output(vdev, vq);
2269
2270        if (unlikely(vdev->start_on_kick)) {
2271            virtio_set_started(vdev, true);
2272        }
2273    }
2274}
2275
2276uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2277{
2278    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2279        VIRTIO_NO_VECTOR;
2280}
2281
2282void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2283{
2284    VirtQueue *vq = &vdev->vq[n];
2285
2286    if (n < VIRTIO_QUEUE_MAX) {
2287        if (vdev->vector_queues &&
2288            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2289            QLIST_REMOVE(vq, node);
2290        }
2291        vdev->vq[n].vector = vector;
2292        if (vdev->vector_queues &&
2293            vector != VIRTIO_NO_VECTOR) {
2294            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2295        }
2296    }
2297}
2298
2299VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2300                            VirtIOHandleOutput handle_output)
2301{
2302    int i;
2303
2304    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2305        if (vdev->vq[i].vring.num == 0)
2306            break;
2307    }
2308
2309    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2310        abort();
2311
2312    vdev->vq[i].vring.num = queue_size;
2313    vdev->vq[i].vring.num_default = queue_size;
2314    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2315    vdev->vq[i].handle_output = handle_output;
2316    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2317
2318    return &vdev->vq[i];
2319}
2320
2321void virtio_delete_queue(VirtQueue *vq)
2322{
2323    vq->vring.num = 0;
2324    vq->vring.num_default = 0;
2325    vq->handle_output = NULL;
2326    g_free(vq->used_elems);
2327    vq->used_elems = NULL;
2328    virtio_virtqueue_reset_region_cache(vq);
2329}
2330
2331void virtio_del_queue(VirtIODevice *vdev, int n)
2332{
2333    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2334        abort();
2335    }
2336
2337    virtio_delete_queue(&vdev->vq[n]);
2338}
2339
2340static void virtio_set_isr(VirtIODevice *vdev, int value)
2341{
2342    uint8_t old = qatomic_read(&vdev->isr);
2343
2344    /* Do not write ISR if it does not change, so that its cacheline remains
2345     * shared in the common case where the guest does not read it.
2346     */
2347    if ((old & value) != value) {
2348        qatomic_or(&vdev->isr, value);
2349    }
2350}
2351
2352/* Called within rcu_read_lock(). */
2353static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2354{
2355    uint16_t old, new;
2356    bool v;
2357    /* We need to expose used array entries before checking used event. */
2358    smp_mb();
2359    /* Always notify when queue is empty (when feature acknowledge) */
2360    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2361        !vq->inuse && virtio_queue_empty(vq)) {
2362        return true;
2363    }
2364
2365    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2366        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2367    }
2368
2369    v = vq->signalled_used_valid;
2370    vq->signalled_used_valid = true;
2371    old = vq->signalled_used;
2372    new = vq->signalled_used = vq->used_idx;
2373    return !v || vring_need_event(vring_get_used_event(vq), new, old);
2374}
2375
2376static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2377                                    uint16_t off_wrap, uint16_t new,
2378                                    uint16_t old)
2379{
2380    int off = off_wrap & ~(1 << 15);
2381
2382    if (wrap != off_wrap >> 15) {
2383        off -= vq->vring.num;
2384    }
2385
2386    return vring_need_event(off, new, old);
2387}
2388
2389/* Called within rcu_read_lock(). */
2390static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2391{
2392    VRingPackedDescEvent e;
2393    uint16_t old, new;
2394    bool v;
2395    VRingMemoryRegionCaches *caches;
2396
2397    caches = vring_get_region_caches(vq);
2398    if (!caches) {
2399        return false;
2400    }
2401
2402    vring_packed_event_read(vdev, &caches->avail, &e);
2403
2404    old = vq->signalled_used;
2405    new = vq->signalled_used = vq->used_idx;
2406    v = vq->signalled_used_valid;
2407    vq->signalled_used_valid = true;
2408
2409    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2410        return false;
2411    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2412        return true;
2413    }
2414
2415    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2416                                         e.off_wrap, new, old);
2417}
2418
2419/* Called within rcu_read_lock().  */
2420static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2421{
2422    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2423        return virtio_packed_should_notify(vdev, vq);
2424    } else {
2425        return virtio_split_should_notify(vdev, vq);
2426    }
2427}
2428
2429void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2430{
2431    WITH_RCU_READ_LOCK_GUARD() {
2432        if (!virtio_should_notify(vdev, vq)) {
2433            return;
2434        }
2435    }
2436
2437    trace_virtio_notify_irqfd(vdev, vq);
2438
2439    /*
2440     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2441     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2442     * incorrectly polling this bit during crashdump and hibernation
2443     * in MSI mode, causing a hang if this bit is never updated.
2444     * Recent releases of Windows do not really shut down, but rather
2445     * log out and hibernate to make the next startup faster.  Hence,
2446     * this manifested as a more serious hang during shutdown with
2447     *
2448     * Next driver release from 2016 fixed this problem, so working around it
2449     * is not a must, but it's easy to do so let's do it here.
2450     *
2451     * Note: it's safe to update ISR from any thread as it was switched
2452     * to an atomic operation.
2453     */
2454    virtio_set_isr(vq->vdev, 0x1);
2455    event_notifier_set(&vq->guest_notifier);
2456}
2457
2458static void virtio_irq(VirtQueue *vq)
2459{
2460    virtio_set_isr(vq->vdev, 0x1);
2461    virtio_notify_vector(vq->vdev, vq->vector);
2462}
2463
2464void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2465{
2466    WITH_RCU_READ_LOCK_GUARD() {
2467        if (!virtio_should_notify(vdev, vq)) {
2468            return;
2469        }
2470    }
2471
2472    trace_virtio_notify(vdev, vq);
2473    virtio_irq(vq);
2474}
2475
2476void virtio_notify_config(VirtIODevice *vdev)
2477{
2478    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2479        return;
2480
2481    virtio_set_isr(vdev, 0x3);
2482    vdev->generation++;
2483    virtio_notify_vector(vdev, vdev->config_vector);
2484}
2485
2486static bool virtio_device_endian_needed(void *opaque)
2487{
2488    VirtIODevice *vdev = opaque;
2489
2490    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2491    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2492        return vdev->device_endian != virtio_default_endian();
2493    }
2494    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2495    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2496}
2497
2498static bool virtio_64bit_features_needed(void *opaque)
2499{
2500    VirtIODevice *vdev = opaque;
2501
2502    return (vdev->host_features >> 32) != 0;
2503}
2504
2505static bool virtio_virtqueue_needed(void *opaque)
2506{
2507    VirtIODevice *vdev = opaque;
2508
2509    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2510}
2511
2512static bool virtio_packed_virtqueue_needed(void *opaque)
2513{
2514    VirtIODevice *vdev = opaque;
2515
2516    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2517}
2518
2519static bool virtio_ringsize_needed(void *opaque)
2520{
2521    VirtIODevice *vdev = opaque;
2522    int i;
2523
2524    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2525        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2526            return true;
2527        }
2528    }
2529    return false;
2530}
2531
2532static bool virtio_extra_state_needed(void *opaque)
2533{
2534    VirtIODevice *vdev = opaque;
2535    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2536    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2537
2538    return k->has_extra_state &&
2539        k->has_extra_state(qbus->parent);
2540}
2541
2542static bool virtio_broken_needed(void *opaque)
2543{
2544    VirtIODevice *vdev = opaque;
2545
2546    return vdev->broken;
2547}
2548
2549static bool virtio_started_needed(void *opaque)
2550{
2551    VirtIODevice *vdev = opaque;
2552
2553    return vdev->started;
2554}
2555
2556static bool virtio_disabled_needed(void *opaque)
2557{
2558    VirtIODevice *vdev = opaque;
2559
2560    return vdev->disabled;
2561}
2562
2563static const VMStateDescription vmstate_virtqueue = {
2564    .name = "virtqueue_state",
2565    .version_id = 1,
2566    .minimum_version_id = 1,
2567    .fields = (VMStateField[]) {
2568        VMSTATE_UINT64(vring.avail, struct VirtQueue),
2569        VMSTATE_UINT64(vring.used, struct VirtQueue),
2570        VMSTATE_END_OF_LIST()
2571    }
2572};
2573
2574static const VMStateDescription vmstate_packed_virtqueue = {
2575    .name = "packed_virtqueue_state",
2576    .version_id = 1,
2577    .minimum_version_id = 1,
2578    .fields = (VMStateField[]) {
2579        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2580        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2581        VMSTATE_UINT16(used_idx, struct VirtQueue),
2582        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2583        VMSTATE_UINT32(inuse, struct VirtQueue),
2584        VMSTATE_END_OF_LIST()
2585    }
2586};
2587
2588static const VMStateDescription vmstate_virtio_virtqueues = {
2589    .name = "virtio/virtqueues",
2590    .version_id = 1,
2591    .minimum_version_id = 1,
2592    .needed = &virtio_virtqueue_needed,
2593    .fields = (VMStateField[]) {
2594        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2595                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2596        VMSTATE_END_OF_LIST()
2597    }
2598};
2599
2600static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2601    .name = "virtio/packed_virtqueues",
2602    .version_id = 1,
2603    .minimum_version_id = 1,
2604    .needed = &virtio_packed_virtqueue_needed,
2605    .fields = (VMStateField[]) {
2606        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2607                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2608        VMSTATE_END_OF_LIST()
2609    }
2610};
2611
2612static const VMStateDescription vmstate_ringsize = {
2613    .name = "ringsize_state",
2614    .version_id = 1,
2615    .minimum_version_id = 1,
2616    .fields = (VMStateField[]) {
2617        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2618        VMSTATE_END_OF_LIST()
2619    }
2620};
2621
2622static const VMStateDescription vmstate_virtio_ringsize = {
2623    .name = "virtio/ringsize",
2624    .version_id = 1,
2625    .minimum_version_id = 1,
2626    .needed = &virtio_ringsize_needed,
2627    .fields = (VMStateField[]) {
2628        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2629                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2630        VMSTATE_END_OF_LIST()
2631    }
2632};
2633
2634static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2635                           const VMStateField *field)
2636{
2637    VirtIODevice *vdev = pv;
2638    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2639    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2640
2641    if (!k->load_extra_state) {
2642        return -1;
2643    } else {
2644        return k->load_extra_state(qbus->parent, f);
2645    }
2646}
2647
2648static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2649                           const VMStateField *field, JSONWriter *vmdesc)
2650{
2651    VirtIODevice *vdev = pv;
2652    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2653    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2654
2655    k->save_extra_state(qbus->parent, f);
2656    return 0;
2657}
2658
2659static const VMStateInfo vmstate_info_extra_state = {
2660    .name = "virtqueue_extra_state",
2661    .get = get_extra_state,
2662    .put = put_extra_state,
2663};
2664
2665static const VMStateDescription vmstate_virtio_extra_state = {
2666    .name = "virtio/extra_state",
2667    .version_id = 1,
2668    .minimum_version_id = 1,
2669    .needed = &virtio_extra_state_needed,
2670    .fields = (VMStateField[]) {
2671        {
2672            .name         = "extra_state",
2673            .version_id   = 0,
2674            .field_exists = NULL,
2675            .size         = 0,
2676            .info         = &vmstate_info_extra_state,
2677            .flags        = VMS_SINGLE,
2678            .offset       = 0,
2679        },
2680        VMSTATE_END_OF_LIST()
2681    }
2682};
2683
2684static const VMStateDescription vmstate_virtio_device_endian = {
2685    .name = "virtio/device_endian",
2686    .version_id = 1,
2687    .minimum_version_id = 1,
2688    .needed = &virtio_device_endian_needed,
2689    .fields = (VMStateField[]) {
2690        VMSTATE_UINT8(device_endian, VirtIODevice),
2691        VMSTATE_END_OF_LIST()
2692    }
2693};
2694
2695static const VMStateDescription vmstate_virtio_64bit_features = {
2696    .name = "virtio/64bit_features",
2697    .version_id = 1,
2698    .minimum_version_id = 1,
2699    .needed = &virtio_64bit_features_needed,
2700    .fields = (VMStateField[]) {
2701        VMSTATE_UINT64(guest_features, VirtIODevice),
2702        VMSTATE_END_OF_LIST()
2703    }
2704};
2705
2706static const VMStateDescription vmstate_virtio_broken = {
2707    .name = "virtio/broken",
2708    .version_id = 1,
2709    .minimum_version_id = 1,
2710    .needed = &virtio_broken_needed,
2711    .fields = (VMStateField[]) {
2712        VMSTATE_BOOL(broken, VirtIODevice),
2713        VMSTATE_END_OF_LIST()
2714    }
2715};
2716
2717static const VMStateDescription vmstate_virtio_started = {
2718    .name = "virtio/started",
2719    .version_id = 1,
2720    .minimum_version_id = 1,
2721    .needed = &virtio_started_needed,
2722    .fields = (VMStateField[]) {
2723        VMSTATE_BOOL(started, VirtIODevice),
2724        VMSTATE_END_OF_LIST()
2725    }
2726};
2727
2728static const VMStateDescription vmstate_virtio_disabled = {
2729    .name = "virtio/disabled",
2730    .version_id = 1,
2731    .minimum_version_id = 1,
2732    .needed = &virtio_disabled_needed,
2733    .fields = (VMStateField[]) {
2734        VMSTATE_BOOL(disabled, VirtIODevice),
2735        VMSTATE_END_OF_LIST()
2736    }
2737};
2738
2739static const VMStateDescription vmstate_virtio = {
2740    .name = "virtio",
2741    .version_id = 1,
2742    .minimum_version_id = 1,
2743    .fields = (VMStateField[]) {
2744        VMSTATE_END_OF_LIST()
2745    },
2746    .subsections = (const VMStateDescription*[]) {
2747        &vmstate_virtio_device_endian,
2748        &vmstate_virtio_64bit_features,
2749        &vmstate_virtio_virtqueues,
2750        &vmstate_virtio_ringsize,
2751        &vmstate_virtio_broken,
2752        &vmstate_virtio_extra_state,
2753        &vmstate_virtio_started,
2754        &vmstate_virtio_packed_virtqueues,
2755        &vmstate_virtio_disabled,
2756        NULL
2757    }
2758};
2759
2760int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2761{
2762    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2763    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2764    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2765    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2766    int i;
2767
2768    if (k->save_config) {
2769        k->save_config(qbus->parent, f);
2770    }
2771
2772    qemu_put_8s(f, &vdev->status);
2773    qemu_put_8s(f, &vdev->isr);
2774    qemu_put_be16s(f, &vdev->queue_sel);
2775    qemu_put_be32s(f, &guest_features_lo);
2776    qemu_put_be32(f, vdev->config_len);
2777    qemu_put_buffer(f, vdev->config, vdev->config_len);
2778
2779    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2780        if (vdev->vq[i].vring.num == 0)
2781            break;
2782    }
2783
2784    qemu_put_be32(f, i);
2785
2786    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2787        if (vdev->vq[i].vring.num == 0)
2788            break;
2789
2790        qemu_put_be32(f, vdev->vq[i].vring.num);
2791        if (k->has_variable_vring_alignment) {
2792            qemu_put_be32(f, vdev->vq[i].vring.align);
2793        }
2794        /*
2795         * Save desc now, the rest of the ring addresses are saved in
2796         * subsections for VIRTIO-1 devices.
2797         */
2798        qemu_put_be64(f, vdev->vq[i].vring.desc);
2799        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2800        if (k->save_queue) {
2801            k->save_queue(qbus->parent, i, f);
2802        }
2803    }
2804
2805    if (vdc->save != NULL) {
2806        vdc->save(vdev, f);
2807    }
2808
2809    if (vdc->vmsd) {
2810        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2811        if (ret) {
2812            return ret;
2813        }
2814    }
2815
2816    /* Subsections */
2817    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2818}
2819
2820/* A wrapper for use as a VMState .put function */
2821static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2822                              const VMStateField *field, JSONWriter *vmdesc)
2823{
2824    return virtio_save(VIRTIO_DEVICE(opaque), f);
2825}
2826
2827/* A wrapper for use as a VMState .get function */
2828static int coroutine_mixed_fn
2829virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2830                  const VMStateField *field)
2831{
2832    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2833    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2834
2835    return virtio_load(vdev, f, dc->vmsd->version_id);
2836}
2837
2838const VMStateInfo  virtio_vmstate_info = {
2839    .name = "virtio",
2840    .get = virtio_device_get,
2841    .put = virtio_device_put,
2842};
2843
2844static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2845{
2846    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2847    bool bad = (val & ~(vdev->host_features)) != 0;
2848
2849    val &= vdev->host_features;
2850    if (k->set_features) {
2851        k->set_features(vdev, val);
2852    }
2853    vdev->guest_features = val;
2854    return bad ? -1 : 0;
2855}
2856
2857typedef struct VirtioSetFeaturesNocheckData {
2858    Coroutine *co;
2859    VirtIODevice *vdev;
2860    uint64_t val;
2861    int ret;
2862} VirtioSetFeaturesNocheckData;
2863
2864static void virtio_set_features_nocheck_bh(void *opaque)
2865{
2866    VirtioSetFeaturesNocheckData *data = opaque;
2867
2868    data->ret = virtio_set_features_nocheck(data->vdev, data->val);
2869    aio_co_wake(data->co);
2870}
2871
2872static int coroutine_mixed_fn
2873virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
2874{
2875    if (qemu_in_coroutine()) {
2876        VirtioSetFeaturesNocheckData data = {
2877            .co = qemu_coroutine_self(),
2878            .vdev = vdev,
2879            .val = val,
2880        };
2881        aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
2882                                virtio_set_features_nocheck_bh, &data);
2883        qemu_coroutine_yield();
2884        return data.ret;
2885    } else {
2886        return virtio_set_features_nocheck(vdev, val);
2887    }
2888}
2889
2890int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2891{
2892    int ret;
2893    /*
2894     * The driver must not attempt to set features after feature negotiation
2895     * has finished.
2896     */
2897    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2898        return -EINVAL;
2899    }
2900
2901    if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
2902        qemu_log_mask(LOG_GUEST_ERROR,
2903                      "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2904                      __func__, vdev->name);
2905    }
2906
2907    ret = virtio_set_features_nocheck(vdev, val);
2908    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2909        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2910        int i;
2911        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2912            if (vdev->vq[i].vring.num != 0) {
2913                virtio_init_region_cache(vdev, i);
2914            }
2915        }
2916    }
2917    if (!ret) {
2918        if (!virtio_device_started(vdev, vdev->status) &&
2919            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2920            vdev->start_on_kick = true;
2921        }
2922    }
2923    return ret;
2924}
2925
2926size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
2927                              uint64_t host_features)
2928{
2929    size_t config_size = params->min_size;
2930    const VirtIOFeature *feature_sizes = params->feature_sizes;
2931    size_t i;
2932
2933    for (i = 0; feature_sizes[i].flags != 0; i++) {
2934        if (host_features & feature_sizes[i].flags) {
2935            config_size = MAX(feature_sizes[i].end, config_size);
2936        }
2937    }
2938
2939    assert(config_size <= params->max_size);
2940    return config_size;
2941}
2942
2943int coroutine_mixed_fn
2944virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2945{
2946    int i, ret;
2947    int32_t config_len;
2948    uint32_t num;
2949    uint32_t features;
2950    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2951    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2952    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2953
2954    /*
2955     * We poison the endianness to ensure it does not get used before
2956     * subsections have been loaded.
2957     */
2958    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
2959
2960    if (k->load_config) {
2961        ret = k->load_config(qbus->parent, f);
2962        if (ret)
2963            return ret;
2964    }
2965
2966    qemu_get_8s(f, &vdev->status);
2967    qemu_get_8s(f, &vdev->isr);
2968    qemu_get_be16s(f, &vdev->queue_sel);
2969    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2970        return -1;
2971    }
2972    qemu_get_be32s(f, &features);
2973
2974    /*
2975     * Temporarily set guest_features low bits - needed by
2976     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2977     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2978     *
2979     * Note: devices should always test host features in future - don't create
2980     * new dependencies like this.
2981     */
2982    vdev->guest_features = features;
2983
2984    config_len = qemu_get_be32(f);
2985
2986    /*
2987     * There are cases where the incoming config can be bigger or smaller
2988     * than what we have; so load what we have space for, and skip
2989     * any excess that's in the stream.
2990     */
2991    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2992
2993    while (config_len > vdev->config_len) {
2994        qemu_get_byte(f);
2995        config_len--;
2996    }
2997
2998    num = qemu_get_be32(f);
2999
3000    if (num > VIRTIO_QUEUE_MAX) {
3001        error_report("Invalid number of virtqueues: 0x%x", num);
3002        return -1;
3003    }
3004
3005    for (i = 0; i < num; i++) {
3006        vdev->vq[i].vring.num = qemu_get_be32(f);
3007        if (k->has_variable_vring_alignment) {
3008            vdev->vq[i].vring.align = qemu_get_be32(f);
3009        }
3010        vdev->vq[i].vring.desc = qemu_get_be64(f);
3011        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3012        vdev->vq[i].signalled_used_valid = false;
3013        vdev->vq[i].notification = true;
3014
3015        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3016            error_report("VQ %d address 0x0 "
3017                         "inconsistent with Host index 0x%x",
3018                         i, vdev->vq[i].last_avail_idx);
3019            return -1;
3020        }
3021        if (k->load_queue) {
3022            ret = k->load_queue(qbus->parent, i, f);
3023            if (ret)
3024                return ret;
3025        }
3026    }
3027
3028    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3029
3030    if (vdc->load != NULL) {
3031        ret = vdc->load(vdev, f, version_id);
3032        if (ret) {
3033            return ret;
3034        }
3035    }
3036
3037    if (vdc->vmsd) {
3038        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3039        if (ret) {
3040            return ret;
3041        }
3042    }
3043
3044    /* Subsections */
3045    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3046    if (ret) {
3047        return ret;
3048    }
3049
3050    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3051        vdev->device_endian = virtio_default_endian();
3052    }
3053
3054    if (virtio_64bit_features_needed(vdev)) {
3055        /*
3056         * Subsection load filled vdev->guest_features.  Run them
3057         * through virtio_set_features to sanity-check them against
3058         * host_features.
3059         */
3060        uint64_t features64 = vdev->guest_features;
3061        if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
3062            error_report("Features 0x%" PRIx64 " unsupported. "
3063                         "Allowed features: 0x%" PRIx64,
3064                         features64, vdev->host_features);
3065            return -1;
3066        }
3067    } else {
3068        if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
3069            error_report("Features 0x%x unsupported. "
3070                         "Allowed features: 0x%" PRIx64,
3071                         features, vdev->host_features);
3072            return -1;
3073        }
3074    }
3075
3076    if (!virtio_device_started(vdev, vdev->status) &&
3077        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3078        vdev->start_on_kick = true;
3079    }
3080
3081    RCU_READ_LOCK_GUARD();
3082    for (i = 0; i < num; i++) {
3083        if (vdev->vq[i].vring.desc) {
3084            uint16_t nheads;
3085
3086            /*
3087             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3088             * only the region cache needs to be set up.  Legacy devices need
3089             * to calculate used and avail ring addresses based on the desc
3090             * address.
3091             */
3092            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3093                virtio_init_region_cache(vdev, i);
3094            } else {
3095                virtio_queue_update_rings(vdev, i);
3096            }
3097
3098            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3099                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3100                vdev->vq[i].shadow_avail_wrap_counter =
3101                                        vdev->vq[i].last_avail_wrap_counter;
3102                continue;
3103            }
3104
3105            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3106            /* Check it isn't doing strange things with descriptor numbers. */
3107            if (nheads > vdev->vq[i].vring.num) {
3108                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3109                             "inconsistent with Host index 0x%x: delta 0x%x",
3110                             i, vdev->vq[i].vring.num,
3111                             vring_avail_idx(&vdev->vq[i]),
3112                             vdev->vq[i].last_avail_idx, nheads);
3113                vdev->vq[i].used_idx = 0;
3114                vdev->vq[i].shadow_avail_idx = 0;
3115                vdev->vq[i].inuse = 0;
3116                continue;
3117            }
3118            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3119            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3120
3121            /*
3122             * Some devices migrate VirtQueueElements that have been popped
3123             * from the avail ring but not yet returned to the used ring.
3124             * Since max ring size < UINT16_MAX it's safe to use modulo
3125             * UINT16_MAX + 1 subtraction.
3126             */
3127            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3128                                vdev->vq[i].used_idx);
3129            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3130                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3131                             "used_idx 0x%x",
3132                             i, vdev->vq[i].vring.num,
3133                             vdev->vq[i].last_avail_idx,
3134                             vdev->vq[i].used_idx);
3135                return -1;
3136            }
3137        }
3138    }
3139
3140    if (vdc->post_load) {
3141        ret = vdc->post_load(vdev);
3142        if (ret) {
3143            return ret;
3144        }
3145    }
3146
3147    return 0;
3148}
3149
3150void virtio_cleanup(VirtIODevice *vdev)
3151{
3152    qemu_del_vm_change_state_handler(vdev->vmstate);
3153}
3154
3155static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3156{
3157    VirtIODevice *vdev = opaque;
3158    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3159    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3160    bool backend_run = running && virtio_device_started(vdev, vdev->status);
3161    vdev->vm_running = running;
3162
3163    if (backend_run) {
3164        virtio_set_status(vdev, vdev->status);
3165    }
3166
3167    if (k->vmstate_change) {
3168        k->vmstate_change(qbus->parent, backend_run);
3169    }
3170
3171    if (!backend_run) {
3172        virtio_set_status(vdev, vdev->status);
3173    }
3174}
3175
3176void virtio_instance_init_common(Object *proxy_obj, void *data,
3177                                 size_t vdev_size, const char *vdev_name)
3178{
3179    DeviceState *vdev = data;
3180
3181    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3182                                       vdev_size, vdev_name, &error_abort,
3183                                       NULL);
3184    qdev_alias_all_properties(vdev, proxy_obj);
3185}
3186
3187void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3188{
3189    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3190    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3191    int i;
3192    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3193
3194    if (nvectors) {
3195        vdev->vector_queues =
3196            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3197    }
3198
3199    vdev->start_on_kick = false;
3200    vdev->started = false;
3201    vdev->vhost_started = false;
3202    vdev->device_id = device_id;
3203    vdev->status = 0;
3204    qatomic_set(&vdev->isr, 0);
3205    vdev->queue_sel = 0;
3206    vdev->config_vector = VIRTIO_NO_VECTOR;
3207    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3208    vdev->vm_running = runstate_is_running();
3209    vdev->broken = false;
3210    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3211        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3212        vdev->vq[i].vdev = vdev;
3213        vdev->vq[i].queue_index = i;
3214        vdev->vq[i].host_notifier_enabled = false;
3215    }
3216
3217    vdev->name = virtio_id_to_name(device_id);
3218    vdev->config_len = config_size;
3219    if (vdev->config_len) {
3220        vdev->config = g_malloc0(config_size);
3221    } else {
3222        vdev->config = NULL;
3223    }
3224    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3225            virtio_vmstate_change, vdev);
3226    vdev->device_endian = virtio_default_endian();
3227    vdev->use_guest_notifier_mask = true;
3228}
3229
3230/*
3231 * Only devices that have already been around prior to defining the virtio
3232 * standard support legacy mode; this includes devices not specified in the
3233 * standard. All newer devices conform to the virtio standard only.
3234 */
3235bool virtio_legacy_allowed(VirtIODevice *vdev)
3236{
3237    switch (vdev->device_id) {
3238    case VIRTIO_ID_NET:
3239    case VIRTIO_ID_BLOCK:
3240    case VIRTIO_ID_CONSOLE:
3241    case VIRTIO_ID_RNG:
3242    case VIRTIO_ID_BALLOON:
3243    case VIRTIO_ID_RPMSG:
3244    case VIRTIO_ID_SCSI:
3245    case VIRTIO_ID_9P:
3246    case VIRTIO_ID_RPROC_SERIAL:
3247    case VIRTIO_ID_CAIF:
3248        return true;
3249    default:
3250        return false;
3251    }
3252}
3253
3254bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3255{
3256    return vdev->disable_legacy_check;
3257}
3258
3259hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3260{
3261    return vdev->vq[n].vring.desc;
3262}
3263
3264bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3265{
3266    return virtio_queue_get_desc_addr(vdev, n) != 0;
3267}
3268
3269bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3270{
3271    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3272    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3273
3274    if (k->queue_enabled) {
3275        return k->queue_enabled(qbus->parent, n);
3276    }
3277    return virtio_queue_enabled_legacy(vdev, n);
3278}
3279
3280hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3281{
3282    return vdev->vq[n].vring.avail;
3283}
3284
3285hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3286{
3287    return vdev->vq[n].vring.used;
3288}
3289
3290hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3291{
3292    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3293}
3294
3295hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3296{
3297    int s;
3298
3299    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3300        return sizeof(struct VRingPackedDescEvent);
3301    }
3302
3303    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3304    return offsetof(VRingAvail, ring) +
3305        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3306}
3307
3308hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3309{
3310    int s;
3311
3312    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3313        return sizeof(struct VRingPackedDescEvent);
3314    }
3315
3316    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3317    return offsetof(VRingUsed, ring) +
3318        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3319}
3320
3321static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3322                                                           int n)
3323{
3324    unsigned int avail, used;
3325
3326    avail = vdev->vq[n].last_avail_idx;
3327    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3328
3329    used = vdev->vq[n].used_idx;
3330    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3331
3332    return avail | used << 16;
3333}
3334
3335static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3336                                                      int n)
3337{
3338    return vdev->vq[n].last_avail_idx;
3339}
3340
3341unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3342{
3343    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3344        return virtio_queue_packed_get_last_avail_idx(vdev, n);
3345    } else {
3346        return virtio_queue_split_get_last_avail_idx(vdev, n);
3347    }
3348}
3349
3350static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3351                                                   int n, unsigned int idx)
3352{
3353    struct VirtQueue *vq = &vdev->vq[n];
3354
3355    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3356    vq->last_avail_wrap_counter =
3357        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3358    idx >>= 16;
3359    vq->used_idx = idx & 0x7fff;
3360    vq->used_wrap_counter = !!(idx & 0x8000);
3361}
3362
3363static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3364                                                  int n, unsigned int idx)
3365{
3366        vdev->vq[n].last_avail_idx = idx;
3367        vdev->vq[n].shadow_avail_idx = idx;
3368}
3369
3370void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3371                                     unsigned int idx)
3372{
3373    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3374        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3375    } else {
3376        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3377    }
3378}
3379
3380static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3381                                                       int n)
3382{
3383    /* We don't have a reference like avail idx in shared memory */
3384    return;
3385}
3386
3387static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3388                                                      int n)
3389{
3390    RCU_READ_LOCK_GUARD();
3391    if (vdev->vq[n].vring.desc) {
3392        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3393        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3394    }
3395}
3396
3397void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3398{
3399    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3400        virtio_queue_packed_restore_last_avail_idx(vdev, n);
3401    } else {
3402        virtio_queue_split_restore_last_avail_idx(vdev, n);
3403    }
3404}
3405
3406static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3407{
3408    /* used idx was updated through set_last_avail_idx() */
3409    return;
3410}
3411
3412static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3413{
3414    RCU_READ_LOCK_GUARD();
3415    if (vdev->vq[n].vring.desc) {
3416        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3417    }
3418}
3419
3420void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3421{
3422    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3423        return virtio_queue_packed_update_used_idx(vdev, n);
3424    } else {
3425        return virtio_split_packed_update_used_idx(vdev, n);
3426    }
3427}
3428
3429void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3430{
3431    vdev->vq[n].signalled_used_valid = false;
3432}
3433
3434VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3435{
3436    return vdev->vq + n;
3437}
3438
3439uint16_t virtio_get_queue_index(VirtQueue *vq)
3440{
3441    return vq->queue_index;
3442}
3443
3444static void virtio_queue_guest_notifier_read(EventNotifier *n)
3445{
3446    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3447    if (event_notifier_test_and_clear(n)) {
3448        virtio_irq(vq);
3449    }
3450}
3451static void virtio_config_guest_notifier_read(EventNotifier *n)
3452{
3453    VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3454
3455    if (event_notifier_test_and_clear(n)) {
3456        virtio_notify_config(vdev);
3457    }
3458}
3459void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3460                                                bool with_irqfd)
3461{
3462    if (assign && !with_irqfd) {
3463        event_notifier_set_handler(&vq->guest_notifier,
3464                                   virtio_queue_guest_notifier_read);
3465    } else {
3466        event_notifier_set_handler(&vq->guest_notifier, NULL);
3467    }
3468    if (!assign) {
3469        /* Test and clear notifier before closing it,
3470         * in case poll callback didn't have time to run. */
3471        virtio_queue_guest_notifier_read(&vq->guest_notifier);
3472    }
3473}
3474
3475void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3476                                                 bool assign, bool with_irqfd)
3477{
3478    EventNotifier *n;
3479    n = &vdev->config_notifier;
3480    if (assign && !with_irqfd) {
3481        event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3482    } else {
3483        event_notifier_set_handler(n, NULL);
3484    }
3485    if (!assign) {
3486        /* Test and clear notifier before closing it,*/
3487        /* in case poll callback didn't have time to run. */
3488        virtio_config_guest_notifier_read(n);
3489    }
3490}
3491
3492EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3493{
3494    return &vq->guest_notifier;
3495}
3496
3497static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3498{
3499    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3500
3501    virtio_queue_set_notification(vq, 0);
3502}
3503
3504static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3505{
3506    EventNotifier *n = opaque;
3507    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3508
3509    return vq->vring.desc && !virtio_queue_empty(vq);
3510}
3511
3512static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3513{
3514    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3515
3516    virtio_queue_notify_vq(vq);
3517}
3518
3519static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3520{
3521    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3522
3523    /* Caller polls once more after this to catch requests that race with us */
3524    virtio_queue_set_notification(vq, 1);
3525}
3526
3527void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3528{
3529    aio_set_event_notifier(ctx, &vq->host_notifier,
3530                           virtio_queue_host_notifier_read,
3531                           virtio_queue_host_notifier_aio_poll,
3532                           virtio_queue_host_notifier_aio_poll_ready);
3533    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3534                                virtio_queue_host_notifier_aio_poll_begin,
3535                                virtio_queue_host_notifier_aio_poll_end);
3536}
3537
3538/*
3539 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3540 * this for rx virtqueues and similar cases where the virtqueue handler
3541 * function does not pop all elements. When the virtqueue is left non-empty
3542 * polling consumes CPU cycles and should not be used.
3543 */
3544void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3545{
3546    aio_set_event_notifier(ctx, &vq->host_notifier,
3547                           virtio_queue_host_notifier_read,
3548                           NULL, NULL);
3549}
3550
3551void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3552{
3553    aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
3554}
3555
3556void virtio_queue_host_notifier_read(EventNotifier *n)
3557{
3558    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3559    if (event_notifier_test_and_clear(n)) {
3560        virtio_queue_notify_vq(vq);
3561    }
3562}
3563
3564EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3565{
3566    return &vq->host_notifier;
3567}
3568
3569EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3570{
3571    return &vdev->config_notifier;
3572}
3573
3574void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3575{
3576    vq->host_notifier_enabled = enabled;
3577}
3578
3579int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3580                                      MemoryRegion *mr, bool assign)
3581{
3582    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3583    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3584
3585    if (k->set_host_notifier_mr) {
3586        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3587    }
3588
3589    return -1;
3590}
3591
3592void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3593{
3594    g_free(vdev->bus_name);
3595    vdev->bus_name = g_strdup(bus_name);
3596}
3597
3598void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3599{
3600    va_list ap;
3601
3602    va_start(ap, fmt);
3603    error_vreport(fmt, ap);
3604    va_end(ap);
3605
3606    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3607        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3608        virtio_notify_config(vdev);
3609    }
3610
3611    vdev->broken = true;
3612}
3613
3614static void virtio_memory_listener_commit(MemoryListener *listener)
3615{
3616    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3617    int i;
3618
3619    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3620        if (vdev->vq[i].vring.num == 0) {
3621            break;
3622        }
3623        virtio_init_region_cache(vdev, i);
3624    }
3625}
3626
3627static void virtio_device_realize(DeviceState *dev, Error **errp)
3628{
3629    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3630    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3631    Error *err = NULL;
3632
3633    /* Devices should either use vmsd or the load/save methods */
3634    assert(!vdc->vmsd || !vdc->load);
3635
3636    if (vdc->realize != NULL) {
3637        vdc->realize(dev, &err);
3638        if (err != NULL) {
3639            error_propagate(errp, err);
3640            return;
3641        }
3642    }
3643
3644    virtio_bus_device_plugged(vdev, &err);
3645    if (err != NULL) {
3646        error_propagate(errp, err);
3647        vdc->unrealize(dev);
3648        return;
3649    }
3650
3651    vdev->listener.commit = virtio_memory_listener_commit;
3652    vdev->listener.name = "virtio";
3653    memory_listener_register(&vdev->listener, vdev->dma_as);
3654    QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
3655}
3656
3657static void virtio_device_unrealize(DeviceState *dev)
3658{
3659    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3660    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3661
3662    memory_listener_unregister(&vdev->listener);
3663    virtio_bus_device_unplugged(vdev);
3664
3665    if (vdc->unrealize != NULL) {
3666        vdc->unrealize(dev);
3667    }
3668
3669    QTAILQ_REMOVE(&virtio_list, vdev, next);
3670    g_free(vdev->bus_name);
3671    vdev->bus_name = NULL;
3672}
3673
3674static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3675{
3676    int i;
3677    if (!vdev->vq) {
3678        return;
3679    }
3680
3681    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3682        if (vdev->vq[i].vring.num == 0) {
3683            break;
3684        }
3685        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3686    }
3687    g_free(vdev->vq);
3688}
3689
3690static void virtio_device_instance_finalize(Object *obj)
3691{
3692    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3693
3694    virtio_device_free_virtqueues(vdev);
3695
3696    g_free(vdev->config);
3697    g_free(vdev->vector_queues);
3698}
3699
3700static Property virtio_properties[] = {
3701    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3702    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3703    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3704    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3705                     disable_legacy_check, false),
3706    DEFINE_PROP_END_OF_LIST(),
3707};
3708
3709static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3710{
3711    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3712    int i, n, r, err;
3713
3714    /*
3715     * Batch all the host notifiers in a single transaction to avoid
3716     * quadratic time complexity in address_space_update_ioeventfds().
3717     */
3718    memory_region_transaction_begin();
3719    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3720        VirtQueue *vq = &vdev->vq[n];
3721        if (!virtio_queue_get_num(vdev, n)) {
3722            continue;
3723        }
3724        r = virtio_bus_set_host_notifier(qbus, n, true);
3725        if (r < 0) {
3726            err = r;
3727            goto assign_error;
3728        }
3729        event_notifier_set_handler(&vq->host_notifier,
3730                                   virtio_queue_host_notifier_read);
3731    }
3732
3733    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3734        /* Kick right away to begin processing requests already in vring */
3735        VirtQueue *vq = &vdev->vq[n];
3736        if (!vq->vring.num) {
3737            continue;
3738        }
3739        event_notifier_set(&vq->host_notifier);
3740    }
3741    memory_region_transaction_commit();
3742    return 0;
3743
3744assign_error:
3745    i = n; /* save n for a second iteration after transaction is committed. */
3746    while (--n >= 0) {
3747        VirtQueue *vq = &vdev->vq[n];
3748        if (!virtio_queue_get_num(vdev, n)) {
3749            continue;
3750        }
3751
3752        event_notifier_set_handler(&vq->host_notifier, NULL);
3753        r = virtio_bus_set_host_notifier(qbus, n, false);
3754        assert(r >= 0);
3755    }
3756    /*
3757     * The transaction expects the ioeventfds to be open when it
3758     * commits. Do it now, before the cleanup loop.
3759     */
3760    memory_region_transaction_commit();
3761
3762    while (--i >= 0) {
3763        if (!virtio_queue_get_num(vdev, i)) {
3764            continue;
3765        }
3766        virtio_bus_cleanup_host_notifier(qbus, i);
3767    }
3768    return err;
3769}
3770
3771int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3772{
3773    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3774    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3775
3776    return virtio_bus_start_ioeventfd(vbus);
3777}
3778
3779static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3780{
3781    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3782    int n, r;
3783
3784    /*
3785     * Batch all the host notifiers in a single transaction to avoid
3786     * quadratic time complexity in address_space_update_ioeventfds().
3787     */
3788    memory_region_transaction_begin();
3789    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3790        VirtQueue *vq = &vdev->vq[n];
3791
3792        if (!virtio_queue_get_num(vdev, n)) {
3793            continue;
3794        }
3795        event_notifier_set_handler(&vq->host_notifier, NULL);
3796        r = virtio_bus_set_host_notifier(qbus, n, false);
3797        assert(r >= 0);
3798    }
3799    /*
3800     * The transaction expects the ioeventfds to be open when it
3801     * commits. Do it now, before the cleanup loop.
3802     */
3803    memory_region_transaction_commit();
3804
3805    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3806        if (!virtio_queue_get_num(vdev, n)) {
3807            continue;
3808        }
3809        virtio_bus_cleanup_host_notifier(qbus, n);
3810    }
3811}
3812
3813int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3814{
3815    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3816    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3817
3818    return virtio_bus_grab_ioeventfd(vbus);
3819}
3820
3821void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3822{
3823    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3824    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3825
3826    virtio_bus_release_ioeventfd(vbus);
3827}
3828
3829static void virtio_device_class_init(ObjectClass *klass, void *data)
3830{
3831    /* Set the default value here. */
3832    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3833    DeviceClass *dc = DEVICE_CLASS(klass);
3834
3835    dc->realize = virtio_device_realize;
3836    dc->unrealize = virtio_device_unrealize;
3837    dc->bus_type = TYPE_VIRTIO_BUS;
3838    device_class_set_props(dc, virtio_properties);
3839    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3840    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3841
3842    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3843
3844    QTAILQ_INIT(&virtio_list);
3845}
3846
3847bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3848{
3849    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3850    VirtioBusState *vbus = VIRTIO_BUS(qbus);
3851
3852    return virtio_bus_ioeventfd_enabled(vbus);
3853}
3854
3855VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
3856                                                 uint16_t queue,
3857                                                 Error **errp)
3858{
3859    VirtIODevice *vdev;
3860    VirtQueueStatus *status;
3861
3862    vdev = qmp_find_virtio_device(path);
3863    if (vdev == NULL) {
3864        error_setg(errp, "Path %s is not a VirtIODevice", path);
3865        return NULL;
3866    }
3867
3868    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3869        error_setg(errp, "Invalid virtqueue number %d", queue);
3870        return NULL;
3871    }
3872
3873    status = g_new0(VirtQueueStatus, 1);
3874    status->name = g_strdup(vdev->name);
3875    status->queue_index = vdev->vq[queue].queue_index;
3876    status->inuse = vdev->vq[queue].inuse;
3877    status->vring_num = vdev->vq[queue].vring.num;
3878    status->vring_num_default = vdev->vq[queue].vring.num_default;
3879    status->vring_align = vdev->vq[queue].vring.align;
3880    status->vring_desc = vdev->vq[queue].vring.desc;
3881    status->vring_avail = vdev->vq[queue].vring.avail;
3882    status->vring_used = vdev->vq[queue].vring.used;
3883    status->used_idx = vdev->vq[queue].used_idx;
3884    status->signalled_used = vdev->vq[queue].signalled_used;
3885    status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
3886
3887    if (vdev->vhost_started) {
3888        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3889        struct vhost_dev *hdev = vdc->get_vhost(vdev);
3890
3891        /* check if vq index exists for vhost as well  */
3892        if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
3893            status->has_last_avail_idx = true;
3894
3895            int vhost_vq_index =
3896                hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
3897            struct vhost_vring_state state = {
3898                .index = vhost_vq_index,
3899            };
3900
3901            status->last_avail_idx =
3902                hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
3903        }
3904    } else {
3905        status->has_shadow_avail_idx = true;
3906        status->has_last_avail_idx = true;
3907        status->last_avail_idx = vdev->vq[queue].last_avail_idx;
3908        status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
3909    }
3910
3911    return status;
3912}
3913
3914static strList *qmp_decode_vring_desc_flags(uint16_t flags)
3915{
3916    strList *list = NULL;
3917    strList *node;
3918    int i;
3919
3920    struct {
3921        uint16_t flag;
3922        const char *value;
3923    } map[] = {
3924        { VRING_DESC_F_NEXT, "next" },
3925        { VRING_DESC_F_WRITE, "write" },
3926        { VRING_DESC_F_INDIRECT, "indirect" },
3927        { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
3928        { 1 << VRING_PACKED_DESC_F_USED, "used" },
3929        { 0, "" }
3930    };
3931
3932    for (i = 0; map[i].flag; i++) {
3933        if ((map[i].flag & flags) == 0) {
3934            continue;
3935        }
3936        node = g_malloc0(sizeof(strList));
3937        node->value = g_strdup(map[i].value);
3938        node->next = list;
3939        list = node;
3940    }
3941
3942    return list;
3943}
3944
3945VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
3946                                                     uint16_t queue,
3947                                                     bool has_index,
3948                                                     uint16_t index,
3949                                                     Error **errp)
3950{
3951    VirtIODevice *vdev;
3952    VirtQueue *vq;
3953    VirtioQueueElement *element = NULL;
3954
3955    vdev = qmp_find_virtio_device(path);
3956    if (vdev == NULL) {
3957        error_setg(errp, "Path %s is not a VirtIO device", path);
3958        return NULL;
3959    }
3960
3961    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
3962        error_setg(errp, "Invalid virtqueue number %d", queue);
3963        return NULL;
3964    }
3965    vq = &vdev->vq[queue];
3966
3967    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3968        error_setg(errp, "Packed ring not supported");
3969        return NULL;
3970    } else {
3971        unsigned int head, i, max;
3972        VRingMemoryRegionCaches *caches;
3973        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
3974        MemoryRegionCache *desc_cache;
3975        VRingDesc desc;
3976        VirtioRingDescList *list = NULL;
3977        VirtioRingDescList *node;
3978        int rc; int ndescs;
3979
3980        RCU_READ_LOCK_GUARD();
3981
3982        max = vq->vring.num;
3983
3984        if (!has_index) {
3985            head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
3986        } else {
3987            head = vring_avail_ring(vq, index % vq->vring.num);
3988        }
3989        i = head;
3990
3991        caches = vring_get_region_caches(vq);
3992        if (!caches) {
3993            error_setg(errp, "Region caches not initialized");
3994            return NULL;
3995        }
3996        if (caches->desc.len < max * sizeof(VRingDesc)) {
3997            error_setg(errp, "Cannot map descriptor ring");
3998            return NULL;
3999        }
4000
4001        desc_cache = &caches->desc;
4002        vring_split_desc_read(vdev, &desc, desc_cache, i);
4003        if (desc.flags & VRING_DESC_F_INDIRECT) {
4004            int64_t len;
4005            len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4006                                           desc.addr, desc.len, false);
4007            desc_cache = &indirect_desc_cache;
4008            if (len < desc.len) {
4009                error_setg(errp, "Cannot map indirect buffer");
4010                goto done;
4011            }
4012
4013            max = desc.len / sizeof(VRingDesc);
4014            i = 0;
4015            vring_split_desc_read(vdev, &desc, desc_cache, i);
4016        }
4017
4018        element = g_new0(VirtioQueueElement, 1);
4019        element->avail = g_new0(VirtioRingAvail, 1);
4020        element->used = g_new0(VirtioRingUsed, 1);
4021        element->name = g_strdup(vdev->name);
4022        element->index = head;
4023        element->avail->flags = vring_avail_flags(vq);
4024        element->avail->idx = vring_avail_idx(vq);
4025        element->avail->ring = head;
4026        element->used->flags = vring_used_flags(vq);
4027        element->used->idx = vring_used_idx(vq);
4028        ndescs = 0;
4029
4030        do {
4031            /* A buggy driver may produce an infinite loop */
4032            if (ndescs >= max) {
4033                break;
4034            }
4035            node = g_new0(VirtioRingDescList, 1);
4036            node->value = g_new0(VirtioRingDesc, 1);
4037            node->value->addr = desc.addr;
4038            node->value->len = desc.len;
4039            node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4040            node->next = list;
4041            list = node;
4042
4043            ndescs++;
4044            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
4045                                                max, &i);
4046        } while (rc == VIRTQUEUE_READ_DESC_MORE);
4047        element->descs = list;
4048done:
4049        address_space_cache_destroy(&indirect_desc_cache);
4050    }
4051
4052    return element;
4053}
4054
4055static const TypeInfo virtio_device_info = {
4056    .name = TYPE_VIRTIO_DEVICE,
4057    .parent = TYPE_DEVICE,
4058    .instance_size = sizeof(VirtIODevice),
4059    .class_init = virtio_device_class_init,
4060    .instance_finalize = virtio_device_instance_finalize,
4061    .abstract = true,
4062    .class_size = sizeof(VirtioDeviceClass),
4063};
4064
4065static void virtio_register_types(void)
4066{
4067    type_register_static(&virtio_device_info);
4068}
4069
4070type_init(virtio_register_types)
4071