qemu/hw/virtio/virtio.c
<<
>>
Prefs
   1/*
   2 * Virtio Support
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include <inttypes.h>
  15
  16#include "trace.h"
  17#include "qemu/error-report.h"
  18#include "hw/virtio/virtio.h"
  19#include "qemu/atomic.h"
  20#include "hw/virtio/virtio-bus.h"
  21
  22/* The alignment to use between consumer and producer parts of vring.
  23 * x86 pagesize again. */
  24#define VIRTIO_PCI_VRING_ALIGN         4096
  25
  26typedef struct VRingDesc
  27{
  28    uint64_t addr;
  29    uint32_t len;
  30    uint16_t flags;
  31    uint16_t next;
  32} VRingDesc;
  33
  34typedef struct VRingAvail
  35{
  36    uint16_t flags;
  37    uint16_t idx;
  38    uint16_t ring[0];
  39} VRingAvail;
  40
  41typedef struct VRingUsedElem
  42{
  43    uint32_t id;
  44    uint32_t len;
  45} VRingUsedElem;
  46
  47typedef struct VRingUsed
  48{
  49    uint16_t flags;
  50    uint16_t idx;
  51    VRingUsedElem ring[0];
  52} VRingUsed;
  53
  54typedef struct VRing
  55{
  56    unsigned int num;
  57    hwaddr desc;
  58    hwaddr avail;
  59    hwaddr used;
  60} VRing;
  61
  62struct VirtQueue
  63{
  64    VRing vring;
  65    hwaddr pa;
  66    uint16_t last_avail_idx;
  67    /* Last used index value we have signalled on */
  68    uint16_t signalled_used;
  69
  70    /* Last used index value we have signalled on */
  71    bool signalled_used_valid;
  72
  73    /* Notification enabled? */
  74    bool notification;
  75
  76    uint16_t queue_index;
  77
  78    int inuse;
  79
  80    uint16_t vector;
  81    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  82    VirtIODevice *vdev;
  83    EventNotifier guest_notifier;
  84    EventNotifier host_notifier;
  85};
  86
  87/* virt queue functions */
  88static void virtqueue_init(VirtQueue *vq)
  89{
  90    hwaddr pa = vq->pa;
  91
  92    vq->vring.desc = pa;
  93    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
  94    vq->vring.used = vring_align(vq->vring.avail +
  95                                 offsetof(VRingAvail, ring[vq->vring.num]),
  96                                 VIRTIO_PCI_VRING_ALIGN);
  97}
  98
  99static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
 100{
 101    hwaddr pa;
 102    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
 103    return ldq_phys(pa);
 104}
 105
 106static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
 107{
 108    hwaddr pa;
 109    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
 110    return ldl_phys(pa);
 111}
 112
 113static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
 114{
 115    hwaddr pa;
 116    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
 117    return lduw_phys(pa);
 118}
 119
 120static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
 121{
 122    hwaddr pa;
 123    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
 124    return lduw_phys(pa);
 125}
 126
 127static inline uint16_t vring_avail_flags(VirtQueue *vq)
 128{
 129    hwaddr pa;
 130    pa = vq->vring.avail + offsetof(VRingAvail, flags);
 131    return lduw_phys(pa);
 132}
 133
 134static inline uint16_t vring_avail_idx(VirtQueue *vq)
 135{
 136    hwaddr pa;
 137    pa = vq->vring.avail + offsetof(VRingAvail, idx);
 138    return lduw_phys(pa);
 139}
 140
 141static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
 142{
 143    hwaddr pa;
 144    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
 145    return lduw_phys(pa);
 146}
 147
 148static inline uint16_t vring_used_event(VirtQueue *vq)
 149{
 150    return vring_avail_ring(vq, vq->vring.num);
 151}
 152
 153static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
 154{
 155    hwaddr pa;
 156    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
 157    stl_phys(pa, val);
 158}
 159
 160static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
 161{
 162    hwaddr pa;
 163    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
 164    stl_phys(pa, val);
 165}
 166
 167static uint16_t vring_used_idx(VirtQueue *vq)
 168{
 169    hwaddr pa;
 170    pa = vq->vring.used + offsetof(VRingUsed, idx);
 171    return lduw_phys(pa);
 172}
 173
 174static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
 175{
 176    hwaddr pa;
 177    pa = vq->vring.used + offsetof(VRingUsed, idx);
 178    stw_phys(pa, val);
 179}
 180
 181static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
 182{
 183    hwaddr pa;
 184    pa = vq->vring.used + offsetof(VRingUsed, flags);
 185    stw_phys(pa, lduw_phys(pa) | mask);
 186}
 187
 188static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
 189{
 190    hwaddr pa;
 191    pa = vq->vring.used + offsetof(VRingUsed, flags);
 192    stw_phys(pa, lduw_phys(pa) & ~mask);
 193}
 194
 195static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
 196{
 197    hwaddr pa;
 198    if (!vq->notification) {
 199        return;
 200    }
 201    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
 202    stw_phys(pa, val);
 203}
 204
 205void virtio_queue_set_notification(VirtQueue *vq, int enable)
 206{
 207    vq->notification = enable;
 208    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
 209        vring_avail_event(vq, vring_avail_idx(vq));
 210    } else if (enable) {
 211        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
 212    } else {
 213        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
 214    }
 215    if (enable) {
 216        /* Expose avail event/used flags before caller checks the avail idx. */
 217        smp_mb();
 218    }
 219}
 220
 221int virtio_queue_ready(VirtQueue *vq)
 222{
 223    return vq->vring.avail != 0;
 224}
 225
 226int virtio_queue_empty(VirtQueue *vq)
 227{
 228    return vring_avail_idx(vq) == vq->last_avail_idx;
 229}
 230
 231void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 232                    unsigned int len, unsigned int idx)
 233{
 234    unsigned int offset;
 235    int i;
 236
 237    trace_virtqueue_fill(vq, elem, len, idx);
 238
 239    offset = 0;
 240    for (i = 0; i < elem->in_num; i++) {
 241        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 242
 243        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
 244                                  elem->in_sg[i].iov_len,
 245                                  1, size);
 246
 247        offset += size;
 248    }
 249
 250    for (i = 0; i < elem->out_num; i++)
 251        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
 252                                  elem->out_sg[i].iov_len,
 253                                  0, elem->out_sg[i].iov_len);
 254
 255    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
 256
 257    /* Get a pointer to the next entry in the used ring. */
 258    vring_used_ring_id(vq, idx, elem->index);
 259    vring_used_ring_len(vq, idx, len);
 260}
 261
 262void virtqueue_flush(VirtQueue *vq, unsigned int count)
 263{
 264    uint16_t old, new;
 265    /* Make sure buffer is written before we update index. */
 266    smp_wmb();
 267    trace_virtqueue_flush(vq, count);
 268    old = vring_used_idx(vq);
 269    new = old + count;
 270    vring_used_idx_set(vq, new);
 271    vq->inuse -= count;
 272    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
 273        vq->signalled_used_valid = false;
 274}
 275
 276void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 277                    unsigned int len)
 278{
 279    virtqueue_fill(vq, elem, len, 0);
 280    virtqueue_flush(vq, 1);
 281}
 282
 283static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 284{
 285    uint16_t num_heads = vring_avail_idx(vq) - idx;
 286
 287    /* Check it isn't doing very strange things with descriptor numbers. */
 288    if (num_heads > vq->vring.num) {
 289        error_report("Guest moved used index from %u to %u",
 290                     idx, vring_avail_idx(vq));
 291        exit(1);
 292    }
 293    /* On success, callers read a descriptor at vq->last_avail_idx.
 294     * Make sure descriptor read does not bypass avail index read. */
 295    if (num_heads) {
 296        smp_rmb();
 297    }
 298
 299    return num_heads;
 300}
 301
 302static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
 303{
 304    unsigned int head;
 305
 306    /* Grab the next descriptor number they're advertising, and increment
 307     * the index we've seen. */
 308    head = vring_avail_ring(vq, idx % vq->vring.num);
 309
 310    /* If their number is silly, that's a fatal mistake. */
 311    if (head >= vq->vring.num) {
 312        error_report("Guest says index %u is available", head);
 313        exit(1);
 314    }
 315
 316    return head;
 317}
 318
 319static unsigned virtqueue_next_desc(hwaddr desc_pa,
 320                                    unsigned int i, unsigned int max)
 321{
 322    unsigned int next;
 323
 324    /* If this descriptor says it doesn't chain, we're done. */
 325    if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
 326        return max;
 327
 328    /* Check they're not leading us off end of descriptors. */
 329    next = vring_desc_next(desc_pa, i);
 330    /* Make sure compiler knows to grab that: we don't want it changing! */
 331    smp_wmb();
 332
 333    if (next >= max) {
 334        error_report("Desc next is %u", next);
 335        exit(1);
 336    }
 337
 338    return next;
 339}
 340
 341void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 342                               unsigned int *out_bytes,
 343                               unsigned max_in_bytes, unsigned max_out_bytes)
 344{
 345    unsigned int idx;
 346    unsigned int total_bufs, in_total, out_total;
 347
 348    idx = vq->last_avail_idx;
 349
 350    total_bufs = in_total = out_total = 0;
 351    while (virtqueue_num_heads(vq, idx)) {
 352        unsigned int max, num_bufs, indirect = 0;
 353        hwaddr desc_pa;
 354        int i;
 355
 356        max = vq->vring.num;
 357        num_bufs = total_bufs;
 358        i = virtqueue_get_head(vq, idx++);
 359        desc_pa = vq->vring.desc;
 360
 361        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
 362            if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
 363                error_report("Invalid size for indirect buffer table");
 364                exit(1);
 365            }
 366
 367            /* If we've got too many, that implies a descriptor loop. */
 368            if (num_bufs >= max) {
 369                error_report("Looped descriptor");
 370                exit(1);
 371            }
 372
 373            /* loop over the indirect descriptor table */
 374            indirect = 1;
 375            max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
 376            num_bufs = i = 0;
 377            desc_pa = vring_desc_addr(desc_pa, i);
 378        }
 379
 380        do {
 381            /* If we've got too many, that implies a descriptor loop. */
 382            if (++num_bufs > max) {
 383                error_report("Looped descriptor");
 384                exit(1);
 385            }
 386
 387            if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
 388                in_total += vring_desc_len(desc_pa, i);
 389            } else {
 390                out_total += vring_desc_len(desc_pa, i);
 391            }
 392            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
 393                goto done;
 394            }
 395        } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
 396
 397        if (!indirect)
 398            total_bufs = num_bufs;
 399        else
 400            total_bufs++;
 401    }
 402done:
 403    if (in_bytes) {
 404        *in_bytes = in_total;
 405    }
 406    if (out_bytes) {
 407        *out_bytes = out_total;
 408    }
 409}
 410
 411int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 412                          unsigned int out_bytes)
 413{
 414    unsigned int in_total, out_total;
 415
 416    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
 417    return in_bytes <= in_total && out_bytes <= out_total;
 418}
 419
 420void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
 421    size_t num_sg, int is_write)
 422{
 423    unsigned int i;
 424    hwaddr len;
 425
 426    for (i = 0; i < num_sg; i++) {
 427        len = sg[i].iov_len;
 428        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
 429        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
 430            error_report("virtio: trying to map MMIO memory");
 431            exit(1);
 432        }
 433    }
 434}
 435
 436int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 437{
 438    unsigned int i, head, max;
 439    hwaddr desc_pa = vq->vring.desc;
 440
 441    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
 442        return 0;
 443
 444    /* When we start there are none of either input nor output. */
 445    elem->out_num = elem->in_num = 0;
 446
 447    max = vq->vring.num;
 448
 449    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
 450    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
 451        vring_avail_event(vq, vring_avail_idx(vq));
 452    }
 453
 454    if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
 455        if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
 456            error_report("Invalid size for indirect buffer table");
 457            exit(1);
 458        }
 459
 460        /* loop over the indirect descriptor table */
 461        max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
 462        desc_pa = vring_desc_addr(desc_pa, i);
 463        i = 0;
 464    }
 465
 466    /* Collect all the descriptors */
 467    do {
 468        struct iovec *sg;
 469
 470        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
 471            if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
 472                error_report("Too many write descriptors in indirect table");
 473                exit(1);
 474            }
 475            elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
 476            sg = &elem->in_sg[elem->in_num++];
 477        } else {
 478            if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
 479                error_report("Too many read descriptors in indirect table");
 480                exit(1);
 481            }
 482            elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
 483            sg = &elem->out_sg[elem->out_num++];
 484        }
 485
 486        sg->iov_len = vring_desc_len(desc_pa, i);
 487
 488        /* If we've got too many, that implies a descriptor loop. */
 489        if ((elem->in_num + elem->out_num) > max) {
 490            error_report("Looped descriptor");
 491            exit(1);
 492        }
 493    } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
 494
 495    /* Now map what we have collected */
 496    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
 497    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
 498
 499    elem->index = head;
 500
 501    vq->inuse++;
 502
 503    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
 504    return elem->in_num + elem->out_num;
 505}
 506
 507/* virtio device */
 508static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
 509{
 510    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 511    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 512
 513    if (k->notify) {
 514        k->notify(qbus->parent, vector);
 515    }
 516}
 517
 518void virtio_update_irq(VirtIODevice *vdev)
 519{
 520    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 521}
 522
 523void virtio_set_status(VirtIODevice *vdev, uint8_t val)
 524{
 525    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 526    trace_virtio_set_status(vdev, val);
 527
 528    if (k->set_status) {
 529        k->set_status(vdev, val);
 530    }
 531    vdev->status = val;
 532}
 533
 534void virtio_reset(void *opaque)
 535{
 536    VirtIODevice *vdev = opaque;
 537    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 538    int i;
 539
 540    virtio_set_status(vdev, 0);
 541
 542    if (k->reset) {
 543        k->reset(vdev);
 544    }
 545
 546    vdev->guest_features = 0;
 547    vdev->queue_sel = 0;
 548    vdev->status = 0;
 549    vdev->isr = 0;
 550    vdev->config_vector = VIRTIO_NO_VECTOR;
 551    virtio_notify_vector(vdev, vdev->config_vector);
 552
 553    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 554        vdev->vq[i].vring.desc = 0;
 555        vdev->vq[i].vring.avail = 0;
 556        vdev->vq[i].vring.used = 0;
 557        vdev->vq[i].last_avail_idx = 0;
 558        vdev->vq[i].pa = 0;
 559        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
 560        vdev->vq[i].signalled_used = 0;
 561        vdev->vq[i].signalled_used_valid = false;
 562        vdev->vq[i].notification = true;
 563    }
 564}
 565
 566uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
 567{
 568    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 569    uint8_t val;
 570
 571    if (addr + sizeof(val) > vdev->config_len) {
 572        return (uint32_t)-1;
 573    }
 574
 575    k->get_config(vdev, vdev->config);
 576
 577    val = ldub_p(vdev->config + addr);
 578    return val;
 579}
 580
 581uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
 582{
 583    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 584    uint16_t val;
 585
 586    if (addr + sizeof(val) > vdev->config_len) {
 587        return (uint32_t)-1;
 588    }
 589
 590    k->get_config(vdev, vdev->config);
 591
 592    val = lduw_p(vdev->config + addr);
 593    return val;
 594}
 595
 596uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
 597{
 598    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 599    uint32_t val;
 600
 601    if (addr + sizeof(val) > vdev->config_len) {
 602        return (uint32_t)-1;
 603    }
 604
 605    k->get_config(vdev, vdev->config);
 606
 607    val = ldl_p(vdev->config + addr);
 608    return val;
 609}
 610
 611void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 612{
 613    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 614    uint8_t val = data;
 615
 616    if (addr + sizeof(val) > vdev->config_len) {
 617        return;
 618    }
 619
 620    stb_p(vdev->config + addr, val);
 621
 622    if (k->set_config) {
 623        k->set_config(vdev, vdev->config);
 624    }
 625}
 626
 627void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 628{
 629    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 630    uint16_t val = data;
 631
 632    if (addr + sizeof(val) > vdev->config_len) {
 633        return;
 634    }
 635
 636    stw_p(vdev->config + addr, val);
 637
 638    if (k->set_config) {
 639        k->set_config(vdev, vdev->config);
 640    }
 641}
 642
 643void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 644{
 645    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 646    uint32_t val = data;
 647
 648    if (addr + sizeof(val) > vdev->config_len) {
 649        return;
 650    }
 651
 652    stl_p(vdev->config + addr, val);
 653
 654    if (k->set_config) {
 655        k->set_config(vdev, vdev->config);
 656    }
 657}
 658
 659void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
 660{
 661    vdev->vq[n].pa = addr;
 662    virtqueue_init(&vdev->vq[n]);
 663}
 664
 665hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
 666{
 667    return vdev->vq[n].pa;
 668}
 669
 670int virtio_queue_get_num(VirtIODevice *vdev, int n)
 671{
 672    return vdev->vq[n].vring.num;
 673}
 674
 675int virtio_queue_get_id(VirtQueue *vq)
 676{
 677    VirtIODevice *vdev = vq->vdev;
 678    assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
 679    return vq - &vdev->vq[0];
 680}
 681
 682void virtio_queue_notify_vq(VirtQueue *vq)
 683{
 684    if (vq->vring.desc) {
 685        VirtIODevice *vdev = vq->vdev;
 686        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
 687        vq->handle_output(vdev, vq);
 688    }
 689}
 690
 691void virtio_queue_notify(VirtIODevice *vdev, int n)
 692{
 693    virtio_queue_notify_vq(&vdev->vq[n]);
 694}
 695
 696uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
 697{
 698    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
 699        VIRTIO_NO_VECTOR;
 700}
 701
 702void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
 703{
 704    if (n < VIRTIO_PCI_QUEUE_MAX)
 705        vdev->vq[n].vector = vector;
 706}
 707
 708VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
 709                            void (*handle_output)(VirtIODevice *, VirtQueue *))
 710{
 711    int i;
 712
 713    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 714        if (vdev->vq[i].vring.num == 0)
 715            break;
 716    }
 717
 718    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
 719        abort();
 720
 721    vdev->vq[i].vring.num = queue_size;
 722    vdev->vq[i].handle_output = handle_output;
 723
 724    return &vdev->vq[i];
 725}
 726
 727void virtio_del_queue(VirtIODevice *vdev, int n)
 728{
 729    if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
 730        abort();
 731    }
 732
 733    vdev->vq[n].vring.num = 0;
 734}
 735
 736void virtio_irq(VirtQueue *vq)
 737{
 738    trace_virtio_irq(vq);
 739    vq->vdev->isr |= 0x01;
 740    virtio_notify_vector(vq->vdev, vq->vector);
 741}
 742
 743/* Assuming a given event_idx value from the other size, if
 744 * we have just incremented index from old to new_idx,
 745 * should we trigger an event? */
 746static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
 747{
 748        /* Note: Xen has similar logic for notification hold-off
 749         * in include/xen/interface/io/ring.h with req_event and req_prod
 750         * corresponding to event_idx + 1 and new respectively.
 751         * Note also that req_event and req_prod in Xen start at 1,
 752         * event indexes in virtio start at 0. */
 753        return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
 754}
 755
 756static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
 757{
 758    uint16_t old, new;
 759    bool v;
 760    /* We need to expose used array entries before checking used event. */
 761    smp_mb();
 762    /* Always notify when queue is empty (when feature acknowledge) */
 763    if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
 764         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
 765        return true;
 766    }
 767
 768    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
 769        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
 770    }
 771
 772    v = vq->signalled_used_valid;
 773    vq->signalled_used_valid = true;
 774    old = vq->signalled_used;
 775    new = vq->signalled_used = vring_used_idx(vq);
 776    return !v || vring_need_event(vring_used_event(vq), new, old);
 777}
 778
 779void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 780{
 781    if (!vring_notify(vdev, vq)) {
 782        return;
 783    }
 784
 785    trace_virtio_notify(vdev, vq);
 786    vdev->isr |= 0x01;
 787    virtio_notify_vector(vdev, vq->vector);
 788}
 789
 790void virtio_notify_config(VirtIODevice *vdev)
 791{
 792    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 793        return;
 794
 795    vdev->isr |= 0x03;
 796    virtio_notify_vector(vdev, vdev->config_vector);
 797}
 798
 799void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 800{
 801    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 802    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 803    int i;
 804
 805    if (k->save_config) {
 806        k->save_config(qbus->parent, f);
 807    }
 808
 809    qemu_put_8s(f, &vdev->status);
 810    qemu_put_8s(f, &vdev->isr);
 811    qemu_put_be16s(f, &vdev->queue_sel);
 812    qemu_put_be32s(f, &vdev->guest_features);
 813    qemu_put_be32(f, vdev->config_len);
 814    qemu_put_buffer(f, vdev->config, vdev->config_len);
 815
 816    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 817        if (vdev->vq[i].vring.num == 0)
 818            break;
 819    }
 820
 821    qemu_put_be32(f, i);
 822
 823    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 824        if (vdev->vq[i].vring.num == 0)
 825            break;
 826
 827        qemu_put_be32(f, vdev->vq[i].vring.num);
 828        qemu_put_be64(f, vdev->vq[i].pa);
 829        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
 830        if (k->save_queue) {
 831            k->save_queue(qbus->parent, i, f);
 832        }
 833    }
 834}
 835
 836int virtio_set_features(VirtIODevice *vdev, uint32_t val)
 837{
 838    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 839    VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
 840    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 841    uint32_t supported_features = vbusk->get_features(qbus->parent);
 842    bool bad = (val & ~supported_features) != 0;
 843
 844    val &= supported_features;
 845    if (k->set_features) {
 846        k->set_features(vdev, val);
 847    }
 848    vdev->guest_features = val;
 849    return bad ? -1 : 0;
 850}
 851
 852int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 853{
 854    int num, i, ret;
 855    uint32_t features;
 856    uint32_t supported_features;
 857    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 858    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 859
 860    if (k->load_config) {
 861        ret = k->load_config(qbus->parent, f);
 862        if (ret)
 863            return ret;
 864    }
 865
 866    qemu_get_8s(f, &vdev->status);
 867    qemu_get_8s(f, &vdev->isr);
 868    qemu_get_be16s(f, &vdev->queue_sel);
 869    qemu_get_be32s(f, &features);
 870
 871    if (virtio_set_features(vdev, features) < 0) {
 872        supported_features = k->get_features(qbus->parent);
 873        error_report("Features 0x%x unsupported. Allowed features: 0x%x",
 874                     features, supported_features);
 875        return -1;
 876    }
 877    vdev->config_len = qemu_get_be32(f);
 878    qemu_get_buffer(f, vdev->config, vdev->config_len);
 879
 880    num = qemu_get_be32(f);
 881
 882    for (i = 0; i < num; i++) {
 883        vdev->vq[i].vring.num = qemu_get_be32(f);
 884        vdev->vq[i].pa = qemu_get_be64(f);
 885        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
 886        vdev->vq[i].signalled_used_valid = false;
 887        vdev->vq[i].notification = true;
 888
 889        if (vdev->vq[i].pa) {
 890            uint16_t nheads;
 891            virtqueue_init(&vdev->vq[i]);
 892            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
 893            /* Check it isn't doing very strange things with descriptor numbers. */
 894            if (nheads > vdev->vq[i].vring.num) {
 895                error_report("VQ %d size 0x%x Guest index 0x%x "
 896                             "inconsistent with Host index 0x%x: delta 0x%x",
 897                             i, vdev->vq[i].vring.num,
 898                             vring_avail_idx(&vdev->vq[i]),
 899                             vdev->vq[i].last_avail_idx, nheads);
 900                return -1;
 901            }
 902        } else if (vdev->vq[i].last_avail_idx) {
 903            error_report("VQ %d address 0x0 "
 904                         "inconsistent with Host index 0x%x",
 905                         i, vdev->vq[i].last_avail_idx);
 906                return -1;
 907        }
 908        if (k->load_queue) {
 909            ret = k->load_queue(qbus->parent, i, f);
 910            if (ret)
 911                return ret;
 912        }
 913    }
 914
 915    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 916    return 0;
 917}
 918
 919void virtio_cleanup(VirtIODevice *vdev)
 920{
 921    qemu_del_vm_change_state_handler(vdev->vmstate);
 922    g_free(vdev->config);
 923    g_free(vdev->vq);
 924}
 925
 926static void virtio_vmstate_change(void *opaque, int running, RunState state)
 927{
 928    VirtIODevice *vdev = opaque;
 929    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
 930    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 931    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
 932    vdev->vm_running = running;
 933
 934    if (backend_run) {
 935        virtio_set_status(vdev, vdev->status);
 936    }
 937
 938    if (k->vmstate_change) {
 939        k->vmstate_change(qbus->parent, backend_run);
 940    }
 941
 942    if (!backend_run) {
 943        virtio_set_status(vdev, vdev->status);
 944    }
 945}
 946
 947void virtio_init(VirtIODevice *vdev, const char *name,
 948                 uint16_t device_id, size_t config_size)
 949{
 950    int i;
 951    vdev->device_id = device_id;
 952    vdev->status = 0;
 953    vdev->isr = 0;
 954    vdev->queue_sel = 0;
 955    vdev->config_vector = VIRTIO_NO_VECTOR;
 956    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 957    vdev->vm_running = runstate_is_running();
 958    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 959        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
 960        vdev->vq[i].vdev = vdev;
 961        vdev->vq[i].queue_index = i;
 962    }
 963
 964    vdev->name = name;
 965    vdev->config_len = config_size;
 966    if (vdev->config_len) {
 967        vdev->config = g_malloc0(config_size);
 968    } else {
 969        vdev->config = NULL;
 970    }
 971    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
 972                                                     vdev);
 973}
 974
 975hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
 976{
 977    return vdev->vq[n].vring.desc;
 978}
 979
 980hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
 981{
 982    return vdev->vq[n].vring.avail;
 983}
 984
 985hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
 986{
 987    return vdev->vq[n].vring.used;
 988}
 989
 990hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
 991{
 992    return vdev->vq[n].vring.desc;
 993}
 994
 995hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
 996{
 997    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
 998}
 999
1000hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1001{
1002    return offsetof(VRingAvail, ring) +
1003        sizeof(uint64_t) * vdev->vq[n].vring.num;
1004}
1005
1006hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1007{
1008    return offsetof(VRingUsed, ring) +
1009        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1010}
1011
1012hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1013{
1014    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1015            virtio_queue_get_used_size(vdev, n);
1016}
1017
1018uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1019{
1020    return vdev->vq[n].last_avail_idx;
1021}
1022
1023void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1024{
1025    vdev->vq[n].last_avail_idx = idx;
1026}
1027
1028VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1029{
1030    return vdev->vq + n;
1031}
1032
1033uint16_t virtio_get_queue_index(VirtQueue *vq)
1034{
1035    return vq->queue_index;
1036}
1037
1038static void virtio_queue_guest_notifier_read(EventNotifier *n)
1039{
1040    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1041    if (event_notifier_test_and_clear(n)) {
1042        virtio_irq(vq);
1043    }
1044}
1045
1046void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1047                                                bool with_irqfd)
1048{
1049    if (assign && !with_irqfd) {
1050        event_notifier_set_handler(&vq->guest_notifier,
1051                                   virtio_queue_guest_notifier_read);
1052    } else {
1053        event_notifier_set_handler(&vq->guest_notifier, NULL);
1054    }
1055    if (!assign) {
1056        /* Test and clear notifier before closing it,
1057         * in case poll callback didn't have time to run. */
1058        virtio_queue_guest_notifier_read(&vq->guest_notifier);
1059    }
1060}
1061
1062EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1063{
1064    return &vq->guest_notifier;
1065}
1066
1067static void virtio_queue_host_notifier_read(EventNotifier *n)
1068{
1069    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1070    if (event_notifier_test_and_clear(n)) {
1071        virtio_queue_notify_vq(vq);
1072    }
1073}
1074
1075void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1076                                               bool set_handler)
1077{
1078    if (assign && set_handler) {
1079        event_notifier_set_handler(&vq->host_notifier,
1080                                   virtio_queue_host_notifier_read);
1081    } else {
1082        event_notifier_set_handler(&vq->host_notifier, NULL);
1083    }
1084    if (!assign) {
1085        /* Test and clear notifier before after disabling event,
1086         * in case poll callback didn't have time to run. */
1087        virtio_queue_host_notifier_read(&vq->host_notifier);
1088    }
1089}
1090
1091EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1092{
1093    return &vq->host_notifier;
1094}
1095
1096void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1097{
1098    if (vdev->bus_name) {
1099        g_free(vdev->bus_name);
1100        vdev->bus_name = NULL;
1101    }
1102
1103    if (bus_name) {
1104        vdev->bus_name = g_strdup(bus_name);
1105    }
1106}
1107
1108static int virtio_device_init(DeviceState *qdev)
1109{
1110    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1111    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1112    assert(k->init != NULL);
1113    if (k->init(vdev) < 0) {
1114        return -1;
1115    }
1116    virtio_bus_plug_device(vdev);
1117    return 0;
1118}
1119
1120static int virtio_device_exit(DeviceState *qdev)
1121{
1122    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1123
1124    if (vdev->bus_name) {
1125        g_free(vdev->bus_name);
1126        vdev->bus_name = NULL;
1127    }
1128    return 0;
1129}
1130
1131static void virtio_device_class_init(ObjectClass *klass, void *data)
1132{
1133    /* Set the default value here. */
1134    DeviceClass *dc = DEVICE_CLASS(klass);
1135    dc->init = virtio_device_init;
1136    dc->exit = virtio_device_exit;
1137    dc->bus_type = TYPE_VIRTIO_BUS;
1138}
1139
1140static const TypeInfo virtio_device_info = {
1141    .name = TYPE_VIRTIO_DEVICE,
1142    .parent = TYPE_DEVICE,
1143    .instance_size = sizeof(VirtIODevice),
1144    .class_init = virtio_device_class_init,
1145    .abstract = true,
1146    .class_size = sizeof(VirtioDeviceClass),
1147};
1148
1149static void virtio_register_types(void)
1150{
1151    type_register_static(&virtio_device_info);
1152}
1153
1154type_init(virtio_register_types)
1155