1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <stdbool.h>
22#include <stddef.h>
23#include <errno.h>
24#include <string.h>
25#include <assert.h>
26#include <endian.h>
27#include <unistd.h>
28#include <limits.h>
29#include <fcntl.h>
30#include <inttypes.h>
31
32#include <sys/ioctl.h>
33#include <sys/eventfd.h>
34#include <sys/mman.h>
35
36#include "include/atomic.h"
37#include "linux-headers/linux/virtio_ring.h"
38#include "linux-headers/linux/virtio_config.h"
39#include "linux-headers/linux/vduse.h"
40#include "libvduse.h"
41
42#define VDUSE_VQ_ALIGN 4096
43#define MAX_IOVA_REGIONS 256
44
45#define LOG_ALIGNMENT 64
46
47
48#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
49
50
51#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
52
53#ifndef unlikely
54#define unlikely(x) __builtin_expect(!!(x), 0)
55#endif
56
57typedef struct VduseDescStateSplit {
58 uint8_t inflight;
59 uint8_t padding[5];
60 uint16_t next;
61 uint64_t counter;
62} VduseDescStateSplit;
63
64typedef struct VduseVirtqLogInflight {
65 uint64_t features;
66 uint16_t version;
67 uint16_t desc_num;
68 uint16_t last_batch_head;
69 uint16_t used_idx;
70 VduseDescStateSplit desc[];
71} VduseVirtqLogInflight;
72
73typedef struct VduseVirtqLog {
74 VduseVirtqLogInflight inflight;
75} VduseVirtqLog;
76
77typedef struct VduseVirtqInflightDesc {
78 uint16_t index;
79 uint64_t counter;
80} VduseVirtqInflightDesc;
81
82typedef struct VduseRing {
83 unsigned int num;
84 uint64_t desc_addr;
85 uint64_t avail_addr;
86 uint64_t used_addr;
87 struct vring_desc *desc;
88 struct vring_avail *avail;
89 struct vring_used *used;
90} VduseRing;
91
92struct VduseVirtq {
93 VduseRing vring;
94 uint16_t last_avail_idx;
95 uint16_t shadow_avail_idx;
96 uint16_t used_idx;
97 uint16_t signalled_used;
98 bool signalled_used_valid;
99 int index;
100 int inuse;
101 bool ready;
102 int fd;
103 VduseDev *dev;
104 VduseVirtqInflightDesc *resubmit_list;
105 uint16_t resubmit_num;
106 uint64_t counter;
107 VduseVirtqLog *log;
108};
109
110typedef struct VduseIovaRegion {
111 uint64_t iova;
112 uint64_t size;
113 uint64_t mmap_offset;
114 uint64_t mmap_addr;
115} VduseIovaRegion;
116
117struct VduseDev {
118 VduseVirtq *vqs;
119 VduseIovaRegion regions[MAX_IOVA_REGIONS];
120 int num_regions;
121 char *name;
122 uint32_t device_id;
123 uint32_t vendor_id;
124 uint16_t num_queues;
125 uint16_t queue_size;
126 uint64_t features;
127 const VduseOps *ops;
128 int fd;
129 int ctrl_fd;
130 void *priv;
131 void *log;
132};
133
134static inline size_t vduse_vq_log_size(uint16_t queue_size)
135{
136 return ALIGN_UP(sizeof(VduseDescStateSplit) * queue_size +
137 sizeof(VduseVirtqLogInflight), LOG_ALIGNMENT);
138}
139
140static void *vduse_log_get(const char *filename, size_t size)
141{
142 void *ptr = MAP_FAILED;
143 int fd;
144
145 fd = open(filename, O_RDWR | O_CREAT, 0600);
146 if (fd == -1) {
147 return MAP_FAILED;
148 }
149
150 if (ftruncate(fd, size) == -1) {
151 goto out;
152 }
153
154 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
155
156out:
157 close(fd);
158 return ptr;
159}
160
161static inline bool has_feature(uint64_t features, unsigned int fbit)
162{
163 assert(fbit < 64);
164 return !!(features & (1ULL << fbit));
165}
166
167static inline bool vduse_dev_has_feature(VduseDev *dev, unsigned int fbit)
168{
169 return has_feature(dev->features, fbit);
170}
171
172uint64_t vduse_get_virtio_features(void)
173{
174 return (1ULL << VIRTIO_F_IOMMU_PLATFORM) |
175 (1ULL << VIRTIO_F_VERSION_1) |
176 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
177 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
178 (1ULL << VIRTIO_RING_F_INDIRECT_DESC);
179}
180
181VduseDev *vduse_queue_get_dev(VduseVirtq *vq)
182{
183 return vq->dev;
184}
185
186int vduse_queue_get_fd(VduseVirtq *vq)
187{
188 return vq->fd;
189}
190
191void *vduse_dev_get_priv(VduseDev *dev)
192{
193 return dev->priv;
194}
195
196VduseVirtq *vduse_dev_get_queue(VduseDev *dev, int index)
197{
198 return &dev->vqs[index];
199}
200
201int vduse_dev_get_fd(VduseDev *dev)
202{
203 return dev->fd;
204}
205
206static int vduse_inject_irq(VduseDev *dev, int index)
207{
208 return ioctl(dev->fd, VDUSE_VQ_INJECT_IRQ, &index);
209}
210
211static int inflight_desc_compare(const void *a, const void *b)
212{
213 VduseVirtqInflightDesc *desc0 = (VduseVirtqInflightDesc *)a,
214 *desc1 = (VduseVirtqInflightDesc *)b;
215
216 if (desc1->counter > desc0->counter &&
217 (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
218 return 1;
219 }
220
221 return -1;
222}
223
224static int vduse_queue_check_inflights(VduseVirtq *vq)
225{
226 int i = 0;
227 VduseDev *dev = vq->dev;
228
229 vq->used_idx = le16toh(vq->vring.used->idx);
230 vq->resubmit_num = 0;
231 vq->resubmit_list = NULL;
232 vq->counter = 0;
233
234 if (unlikely(vq->log->inflight.used_idx != vq->used_idx)) {
235 if (vq->log->inflight.last_batch_head > VIRTQUEUE_MAX_SIZE) {
236 return -1;
237 }
238
239 vq->log->inflight.desc[vq->log->inflight.last_batch_head].inflight = 0;
240
241 barrier();
242
243 vq->log->inflight.used_idx = vq->used_idx;
244 }
245
246 for (i = 0; i < vq->log->inflight.desc_num; i++) {
247 if (vq->log->inflight.desc[i].inflight == 1) {
248 vq->inuse++;
249 }
250 }
251
252 vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
253
254 if (vq->inuse) {
255 vq->resubmit_list = calloc(vq->inuse, sizeof(VduseVirtqInflightDesc));
256 if (!vq->resubmit_list) {
257 return -1;
258 }
259
260 for (i = 0; i < vq->log->inflight.desc_num; i++) {
261 if (vq->log->inflight.desc[i].inflight) {
262 vq->resubmit_list[vq->resubmit_num].index = i;
263 vq->resubmit_list[vq->resubmit_num].counter =
264 vq->log->inflight.desc[i].counter;
265 vq->resubmit_num++;
266 }
267 }
268
269 if (vq->resubmit_num > 1) {
270 qsort(vq->resubmit_list, vq->resubmit_num,
271 sizeof(VduseVirtqInflightDesc), inflight_desc_compare);
272 }
273 vq->counter = vq->resubmit_list[0].counter + 1;
274 }
275
276 vduse_inject_irq(dev, vq->index);
277
278 return 0;
279}
280
281static int vduse_queue_inflight_get(VduseVirtq *vq, int desc_idx)
282{
283 vq->log->inflight.desc[desc_idx].counter = vq->counter++;
284
285 barrier();
286
287 vq->log->inflight.desc[desc_idx].inflight = 1;
288
289 return 0;
290}
291
292static int vduse_queue_inflight_pre_put(VduseVirtq *vq, int desc_idx)
293{
294 vq->log->inflight.last_batch_head = desc_idx;
295
296 return 0;
297}
298
299static int vduse_queue_inflight_post_put(VduseVirtq *vq, int desc_idx)
300{
301 vq->log->inflight.desc[desc_idx].inflight = 0;
302
303 barrier();
304
305 vq->log->inflight.used_idx = vq->used_idx;
306
307 return 0;
308}
309
310static void vduse_iova_remove_region(VduseDev *dev, uint64_t start,
311 uint64_t last)
312{
313 int i;
314
315 if (last == start) {
316 return;
317 }
318
319 for (i = 0; i < MAX_IOVA_REGIONS; i++) {
320 if (!dev->regions[i].mmap_addr) {
321 continue;
322 }
323
324 if (start <= dev->regions[i].iova &&
325 last >= (dev->regions[i].iova + dev->regions[i].size - 1)) {
326 munmap((void *)(uintptr_t)dev->regions[i].mmap_addr,
327 dev->regions[i].mmap_offset + dev->regions[i].size);
328 dev->regions[i].mmap_addr = 0;
329 dev->num_regions--;
330 }
331 }
332}
333
334static int vduse_iova_add_region(VduseDev *dev, int fd,
335 uint64_t offset, uint64_t start,
336 uint64_t last, int prot)
337{
338 int i;
339 uint64_t size = last - start + 1;
340 void *mmap_addr = mmap(0, size + offset, prot, MAP_SHARED, fd, 0);
341
342 if (mmap_addr == MAP_FAILED) {
343 close(fd);
344 return -EINVAL;
345 }
346
347 for (i = 0; i < MAX_IOVA_REGIONS; i++) {
348 if (!dev->regions[i].mmap_addr) {
349 dev->regions[i].mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
350 dev->regions[i].mmap_offset = offset;
351 dev->regions[i].iova = start;
352 dev->regions[i].size = size;
353 dev->num_regions++;
354 break;
355 }
356 }
357 assert(i < MAX_IOVA_REGIONS);
358 close(fd);
359
360 return 0;
361}
362
363static int perm_to_prot(uint8_t perm)
364{
365 int prot = 0;
366
367 switch (perm) {
368 case VDUSE_ACCESS_WO:
369 prot |= PROT_WRITE;
370 break;
371 case VDUSE_ACCESS_RO:
372 prot |= PROT_READ;
373 break;
374 case VDUSE_ACCESS_RW:
375 prot |= PROT_READ | PROT_WRITE;
376 break;
377 default:
378 break;
379 }
380
381 return prot;
382}
383
384static inline void *iova_to_va(VduseDev *dev, uint64_t *plen, uint64_t iova)
385{
386 int i, ret;
387 struct vduse_iotlb_entry entry;
388
389 for (i = 0; i < MAX_IOVA_REGIONS; i++) {
390 VduseIovaRegion *r = &dev->regions[i];
391
392 if (!r->mmap_addr) {
393 continue;
394 }
395
396 if ((iova >= r->iova) && (iova < (r->iova + r->size))) {
397 if ((iova + *plen) > (r->iova + r->size)) {
398 *plen = r->iova + r->size - iova;
399 }
400 return (void *)(uintptr_t)(iova - r->iova +
401 r->mmap_addr + r->mmap_offset);
402 }
403 }
404
405 entry.start = iova;
406 entry.last = iova + 1;
407 ret = ioctl(dev->fd, VDUSE_IOTLB_GET_FD, &entry);
408 if (ret < 0) {
409 return NULL;
410 }
411
412 if (!vduse_iova_add_region(dev, ret, entry.offset, entry.start,
413 entry.last, perm_to_prot(entry.perm))) {
414 return iova_to_va(dev, plen, iova);
415 }
416
417 return NULL;
418}
419
420static inline uint16_t vring_avail_flags(VduseVirtq *vq)
421{
422 return le16toh(vq->vring.avail->flags);
423}
424
425static inline uint16_t vring_avail_idx(VduseVirtq *vq)
426{
427 vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
428
429 return vq->shadow_avail_idx;
430}
431
432static inline uint16_t vring_avail_ring(VduseVirtq *vq, int i)
433{
434 return le16toh(vq->vring.avail->ring[i]);
435}
436
437static inline uint16_t vring_get_used_event(VduseVirtq *vq)
438{
439 return vring_avail_ring(vq, vq->vring.num);
440}
441
442static bool vduse_queue_get_head(VduseVirtq *vq, unsigned int idx,
443 unsigned int *head)
444{
445
446
447
448
449 *head = vring_avail_ring(vq, idx % vq->vring.num);
450
451
452 if (*head >= vq->vring.num) {
453 fprintf(stderr, "Guest says index %u is available\n", *head);
454 return false;
455 }
456
457 return true;
458}
459
460static int
461vduse_queue_read_indirect_desc(VduseDev *dev, struct vring_desc *desc,
462 uint64_t addr, size_t len)
463{
464 struct vring_desc *ori_desc;
465 uint64_t read_len;
466
467 if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
468 return -1;
469 }
470
471 if (len == 0) {
472 return -1;
473 }
474
475 while (len) {
476 read_len = len;
477 ori_desc = iova_to_va(dev, &read_len, addr);
478 if (!ori_desc) {
479 return -1;
480 }
481
482 memcpy(desc, ori_desc, read_len);
483 len -= read_len;
484 addr += read_len;
485 desc += read_len;
486 }
487
488 return 0;
489}
490
491enum {
492 VIRTQUEUE_READ_DESC_ERROR = -1,
493 VIRTQUEUE_READ_DESC_DONE = 0,
494 VIRTQUEUE_READ_DESC_MORE = 1,
495};
496
497static int vduse_queue_read_next_desc(struct vring_desc *desc, int i,
498 unsigned int max, unsigned int *next)
499{
500
501 if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
502 return VIRTQUEUE_READ_DESC_DONE;
503 }
504
505
506 *next = desc[i].next;
507
508 smp_wmb();
509
510 if (*next >= max) {
511 fprintf(stderr, "Desc next is %u\n", *next);
512 return VIRTQUEUE_READ_DESC_ERROR;
513 }
514
515 return VIRTQUEUE_READ_DESC_MORE;
516}
517
518
519
520
521
522static bool vduse_queue_empty(VduseVirtq *vq)
523{
524 if (unlikely(!vq->vring.avail)) {
525 return true;
526 }
527
528 if (vq->shadow_avail_idx != vq->last_avail_idx) {
529 return false;
530 }
531
532 return vring_avail_idx(vq) == vq->last_avail_idx;
533}
534
535static bool vduse_queue_should_notify(VduseVirtq *vq)
536{
537 VduseDev *dev = vq->dev;
538 uint16_t old, new;
539 bool v;
540
541
542 smp_mb();
543
544
545 if (vduse_dev_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
546 !vq->inuse && vduse_queue_empty(vq)) {
547 return true;
548 }
549
550 if (!vduse_dev_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
551 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
552 }
553
554 v = vq->signalled_used_valid;
555 vq->signalled_used_valid = true;
556 old = vq->signalled_used;
557 new = vq->signalled_used = vq->used_idx;
558 return !v || vring_need_event(vring_get_used_event(vq), new, old);
559}
560
561void vduse_queue_notify(VduseVirtq *vq)
562{
563 VduseDev *dev = vq->dev;
564
565 if (unlikely(!vq->vring.avail)) {
566 return;
567 }
568
569 if (!vduse_queue_should_notify(vq)) {
570 return;
571 }
572
573 if (vduse_inject_irq(dev, vq->index) < 0) {
574 fprintf(stderr, "Error inject irq for vq %d: %s\n",
575 vq->index, strerror(errno));
576 }
577}
578
579static inline void vring_set_avail_event(VduseVirtq *vq, uint16_t val)
580{
581 *((uint16_t *)&vq->vring.used->ring[vq->vring.num]) = htole16(val);
582}
583
584static bool vduse_queue_map_single_desc(VduseVirtq *vq, unsigned int *p_num_sg,
585 struct iovec *iov, unsigned int max_num_sg,
586 bool is_write, uint64_t pa, size_t sz)
587{
588 unsigned num_sg = *p_num_sg;
589 VduseDev *dev = vq->dev;
590
591 assert(num_sg <= max_num_sg);
592
593 if (!sz) {
594 fprintf(stderr, "virtio: zero sized buffers are not allowed\n");
595 return false;
596 }
597
598 while (sz) {
599 uint64_t len = sz;
600
601 if (num_sg == max_num_sg) {
602 fprintf(stderr,
603 "virtio: too many descriptors in indirect table\n");
604 return false;
605 }
606
607 iov[num_sg].iov_base = iova_to_va(dev, &len, pa);
608 if (iov[num_sg].iov_base == NULL) {
609 fprintf(stderr, "virtio: invalid address for buffers\n");
610 return false;
611 }
612 iov[num_sg++].iov_len = len;
613 sz -= len;
614 pa += len;
615 }
616
617 *p_num_sg = num_sg;
618 return true;
619}
620
621static void *vduse_queue_alloc_element(size_t sz, unsigned out_num,
622 unsigned in_num)
623{
624 VduseVirtqElement *elem;
625 size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
626 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
627 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
628
629 assert(sz >= sizeof(VduseVirtqElement));
630 elem = malloc(out_sg_end);
631 if (!elem) {
632 return NULL;
633 }
634 elem->out_num = out_num;
635 elem->in_num = in_num;
636 elem->in_sg = (void *)elem + in_sg_ofs;
637 elem->out_sg = (void *)elem + out_sg_ofs;
638 return elem;
639}
640
641static void *vduse_queue_map_desc(VduseVirtq *vq, unsigned int idx, size_t sz)
642{
643 struct vring_desc *desc = vq->vring.desc;
644 VduseDev *dev = vq->dev;
645 uint64_t desc_addr, read_len;
646 unsigned int desc_len;
647 unsigned int max = vq->vring.num;
648 unsigned int i = idx;
649 VduseVirtqElement *elem;
650 struct iovec iov[VIRTQUEUE_MAX_SIZE];
651 struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
652 unsigned int out_num = 0, in_num = 0;
653 int rc;
654
655 if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
656 if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
657 fprintf(stderr, "Invalid size for indirect buffer table\n");
658 return NULL;
659 }
660
661
662 desc_addr = le64toh(desc[i].addr);
663 desc_len = le32toh(desc[i].len);
664 max = desc_len / sizeof(struct vring_desc);
665 read_len = desc_len;
666 desc = iova_to_va(dev, &read_len, desc_addr);
667 if (unlikely(desc && read_len != desc_len)) {
668
669 desc = NULL;
670 if (!vduse_queue_read_indirect_desc(dev, desc_buf,
671 desc_addr,
672 desc_len)) {
673 desc = desc_buf;
674 }
675 }
676 if (!desc) {
677 fprintf(stderr, "Invalid indirect buffer table\n");
678 return NULL;
679 }
680 i = 0;
681 }
682
683
684 do {
685 if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
686 if (!vduse_queue_map_single_desc(vq, &in_num, iov + out_num,
687 VIRTQUEUE_MAX_SIZE - out_num,
688 true, le64toh(desc[i].addr),
689 le32toh(desc[i].len))) {
690 return NULL;
691 }
692 } else {
693 if (in_num) {
694 fprintf(stderr, "Incorrect order for descriptors\n");
695 return NULL;
696 }
697 if (!vduse_queue_map_single_desc(vq, &out_num, iov,
698 VIRTQUEUE_MAX_SIZE, false,
699 le64toh(desc[i].addr),
700 le32toh(desc[i].len))) {
701 return NULL;
702 }
703 }
704
705
706 if ((in_num + out_num) > max) {
707 fprintf(stderr, "Looped descriptor\n");
708 return NULL;
709 }
710 rc = vduse_queue_read_next_desc(desc, i, max, &i);
711 } while (rc == VIRTQUEUE_READ_DESC_MORE);
712
713 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
714 fprintf(stderr, "read descriptor error\n");
715 return NULL;
716 }
717
718
719 elem = vduse_queue_alloc_element(sz, out_num, in_num);
720 if (!elem) {
721 fprintf(stderr, "read descriptor error\n");
722 return NULL;
723 }
724 elem->index = idx;
725 for (i = 0; i < out_num; i++) {
726 elem->out_sg[i] = iov[i];
727 }
728 for (i = 0; i < in_num; i++) {
729 elem->in_sg[i] = iov[out_num + i];
730 }
731
732 return elem;
733}
734
735void *vduse_queue_pop(VduseVirtq *vq, size_t sz)
736{
737 unsigned int head;
738 VduseVirtqElement *elem;
739 VduseDev *dev = vq->dev;
740 int i;
741
742 if (unlikely(!vq->vring.avail)) {
743 return NULL;
744 }
745
746 if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
747 i = (--vq->resubmit_num);
748 elem = vduse_queue_map_desc(vq, vq->resubmit_list[i].index, sz);
749
750 if (!vq->resubmit_num) {
751 free(vq->resubmit_list);
752 vq->resubmit_list = NULL;
753 }
754
755 return elem;
756 }
757
758 if (vduse_queue_empty(vq)) {
759 return NULL;
760 }
761
762 smp_rmb();
763
764 if (vq->inuse >= vq->vring.num) {
765 fprintf(stderr, "Virtqueue size exceeded: %d\n", vq->inuse);
766 return NULL;
767 }
768
769 if (!vduse_queue_get_head(vq, vq->last_avail_idx++, &head)) {
770 return NULL;
771 }
772
773 if (vduse_dev_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
774 vring_set_avail_event(vq, vq->last_avail_idx);
775 }
776
777 elem = vduse_queue_map_desc(vq, head, sz);
778
779 if (!elem) {
780 return NULL;
781 }
782
783 vq->inuse++;
784
785 vduse_queue_inflight_get(vq, head);
786
787 return elem;
788}
789
790static inline void vring_used_write(VduseVirtq *vq,
791 struct vring_used_elem *uelem, int i)
792{
793 struct vring_used *used = vq->vring.used;
794
795 used->ring[i] = *uelem;
796}
797
798static void vduse_queue_fill(VduseVirtq *vq, const VduseVirtqElement *elem,
799 unsigned int len, unsigned int idx)
800{
801 struct vring_used_elem uelem;
802
803 if (unlikely(!vq->vring.used)) {
804 return;
805 }
806
807 idx = (idx + vq->used_idx) % vq->vring.num;
808
809 uelem.id = htole32(elem->index);
810 uelem.len = htole32(len);
811 vring_used_write(vq, &uelem, idx);
812}
813
814static inline void vring_used_idx_set(VduseVirtq *vq, uint16_t val)
815{
816 vq->vring.used->idx = htole16(val);
817 vq->used_idx = val;
818}
819
820static void vduse_queue_flush(VduseVirtq *vq, unsigned int count)
821{
822 uint16_t old, new;
823
824 if (unlikely(!vq->vring.used)) {
825 return;
826 }
827
828
829 smp_wmb();
830
831 old = vq->used_idx;
832 new = old + count;
833 vring_used_idx_set(vq, new);
834 vq->inuse -= count;
835 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
836 vq->signalled_used_valid = false;
837 }
838}
839
840void vduse_queue_push(VduseVirtq *vq, const VduseVirtqElement *elem,
841 unsigned int len)
842{
843 vduse_queue_fill(vq, elem, len, 0);
844 vduse_queue_inflight_pre_put(vq, elem->index);
845 vduse_queue_flush(vq, 1);
846 vduse_queue_inflight_post_put(vq, elem->index);
847}
848
849static int vduse_queue_update_vring(VduseVirtq *vq, uint64_t desc_addr,
850 uint64_t avail_addr, uint64_t used_addr)
851{
852 struct VduseDev *dev = vq->dev;
853 uint64_t len;
854
855 len = sizeof(struct vring_desc);
856 vq->vring.desc = iova_to_va(dev, &len, desc_addr);
857 if (len != sizeof(struct vring_desc)) {
858 return -EINVAL;
859 }
860
861 len = sizeof(struct vring_avail);
862 vq->vring.avail = iova_to_va(dev, &len, avail_addr);
863 if (len != sizeof(struct vring_avail)) {
864 return -EINVAL;
865 }
866
867 len = sizeof(struct vring_used);
868 vq->vring.used = iova_to_va(dev, &len, used_addr);
869 if (len != sizeof(struct vring_used)) {
870 return -EINVAL;
871 }
872
873 if (!vq->vring.desc || !vq->vring.avail || !vq->vring.used) {
874 fprintf(stderr, "Failed to get vq[%d] iova mapping\n", vq->index);
875 return -EINVAL;
876 }
877
878 return 0;
879}
880
881static void vduse_queue_enable(VduseVirtq *vq)
882{
883 struct VduseDev *dev = vq->dev;
884 struct vduse_vq_info vq_info;
885 struct vduse_vq_eventfd vq_eventfd;
886 int fd;
887
888 vq_info.index = vq->index;
889 if (ioctl(dev->fd, VDUSE_VQ_GET_INFO, &vq_info)) {
890 fprintf(stderr, "Failed to get vq[%d] info: %s\n",
891 vq->index, strerror(errno));
892 return;
893 }
894
895 if (!vq_info.ready) {
896 return;
897 }
898
899 vq->vring.num = vq_info.num;
900 vq->vring.desc_addr = vq_info.desc_addr;
901 vq->vring.avail_addr = vq_info.driver_addr;
902 vq->vring.used_addr = vq_info.device_addr;
903
904 if (vduse_queue_update_vring(vq, vq_info.desc_addr,
905 vq_info.driver_addr, vq_info.device_addr)) {
906 fprintf(stderr, "Failed to update vring for vq[%d]\n", vq->index);
907 return;
908 }
909
910 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
911 if (fd < 0) {
912 fprintf(stderr, "Failed to init eventfd for vq[%d]\n", vq->index);
913 return;
914 }
915
916 vq_eventfd.index = vq->index;
917 vq_eventfd.fd = fd;
918 if (ioctl(dev->fd, VDUSE_VQ_SETUP_KICKFD, &vq_eventfd)) {
919 fprintf(stderr, "Failed to setup kick fd for vq[%d]\n", vq->index);
920 close(fd);
921 return;
922 }
923
924 vq->fd = fd;
925 vq->signalled_used_valid = false;
926 vq->ready = true;
927
928 if (vduse_queue_check_inflights(vq)) {
929 fprintf(stderr, "Failed to check inflights for vq[%d]\n", vq->index);
930 close(fd);
931 return;
932 }
933
934 dev->ops->enable_queue(dev, vq);
935}
936
937static void vduse_queue_disable(VduseVirtq *vq)
938{
939 struct VduseDev *dev = vq->dev;
940 struct vduse_vq_eventfd eventfd;
941
942 if (!vq->ready) {
943 return;
944 }
945
946 dev->ops->disable_queue(dev, vq);
947
948 eventfd.index = vq->index;
949 eventfd.fd = VDUSE_EVENTFD_DEASSIGN;
950 ioctl(dev->fd, VDUSE_VQ_SETUP_KICKFD, &eventfd);
951 close(vq->fd);
952
953 assert(vq->inuse == 0);
954
955 vq->vring.num = 0;
956 vq->vring.desc_addr = 0;
957 vq->vring.avail_addr = 0;
958 vq->vring.used_addr = 0;
959 vq->vring.desc = 0;
960 vq->vring.avail = 0;
961 vq->vring.used = 0;
962 vq->ready = false;
963 vq->fd = -1;
964}
965
966static void vduse_dev_start_dataplane(VduseDev *dev)
967{
968 int i;
969
970 if (ioctl(dev->fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
971 fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
972 return;
973 }
974 assert(vduse_dev_has_feature(dev, VIRTIO_F_VERSION_1));
975
976 for (i = 0; i < dev->num_queues; i++) {
977 vduse_queue_enable(&dev->vqs[i]);
978 }
979}
980
981static void vduse_dev_stop_dataplane(VduseDev *dev)
982{
983 size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
984 int i;
985
986 for (i = 0; i < dev->num_queues; i++) {
987 vduse_queue_disable(&dev->vqs[i]);
988 }
989 if (dev->log) {
990 memset(dev->log, 0, log_size);
991 }
992 dev->features = 0;
993 vduse_iova_remove_region(dev, 0, ULONG_MAX);
994}
995
996int vduse_dev_handler(VduseDev *dev)
997{
998 struct vduse_dev_request req;
999 struct vduse_dev_response resp = { 0 };
1000 VduseVirtq *vq;
1001 int i, ret;
1002
1003 ret = read(dev->fd, &req, sizeof(req));
1004 if (ret != sizeof(req)) {
1005 fprintf(stderr, "Read request error [%d]: %s\n",
1006 ret, strerror(errno));
1007 return -errno;
1008 }
1009 resp.request_id = req.request_id;
1010
1011 switch (req.type) {
1012 case VDUSE_GET_VQ_STATE:
1013 vq = &dev->vqs[req.vq_state.index];
1014 resp.vq_state.split.avail_index = vq->last_avail_idx;
1015 resp.result = VDUSE_REQ_RESULT_OK;
1016 break;
1017 case VDUSE_SET_STATUS:
1018 if (req.s.status & VIRTIO_CONFIG_S_DRIVER_OK) {
1019 vduse_dev_start_dataplane(dev);
1020 } else if (req.s.status == 0) {
1021 vduse_dev_stop_dataplane(dev);
1022 }
1023 resp.result = VDUSE_REQ_RESULT_OK;
1024 break;
1025 case VDUSE_UPDATE_IOTLB:
1026
1027 vduse_iova_remove_region(dev, req.iova.start, req.iova.last);
1028 for (i = 0; i < dev->num_queues; i++) {
1029 VduseVirtq *vq = &dev->vqs[i];
1030 if (vq->ready) {
1031 if (vduse_queue_update_vring(vq, vq->vring.desc_addr,
1032 vq->vring.avail_addr,
1033 vq->vring.used_addr)) {
1034 fprintf(stderr, "Failed to update vring for vq[%d]\n",
1035 vq->index);
1036 }
1037 }
1038 }
1039 resp.result = VDUSE_REQ_RESULT_OK;
1040 break;
1041 default:
1042 resp.result = VDUSE_REQ_RESULT_FAILED;
1043 break;
1044 }
1045
1046 ret = write(dev->fd, &resp, sizeof(resp));
1047 if (ret != sizeof(resp)) {
1048 fprintf(stderr, "Write request %d error [%d]: %s\n",
1049 req.type, ret, strerror(errno));
1050 return -errno;
1051 }
1052 return 0;
1053}
1054
1055int vduse_dev_update_config(VduseDev *dev, uint32_t size,
1056 uint32_t offset, char *buffer)
1057{
1058 int ret;
1059 struct vduse_config_data *data;
1060
1061 data = malloc(offsetof(struct vduse_config_data, buffer) + size);
1062 if (!data) {
1063 return -ENOMEM;
1064 }
1065
1066 data->offset = offset;
1067 data->length = size;
1068 memcpy(data->buffer, buffer, size);
1069
1070 ret = ioctl(dev->fd, VDUSE_DEV_SET_CONFIG, data);
1071 free(data);
1072
1073 if (ret) {
1074 return -errno;
1075 }
1076
1077 if (ioctl(dev->fd, VDUSE_DEV_INJECT_CONFIG_IRQ)) {
1078 return -errno;
1079 }
1080
1081 return 0;
1082}
1083
1084int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size)
1085{
1086 VduseVirtq *vq = &dev->vqs[index];
1087 struct vduse_vq_config vq_config = { 0 };
1088
1089 if (max_size > VIRTQUEUE_MAX_SIZE) {
1090 return -EINVAL;
1091 }
1092
1093 vq_config.index = vq->index;
1094 vq_config.max_size = max_size;
1095
1096 if (ioctl(dev->fd, VDUSE_VQ_SETUP, &vq_config)) {
1097 return -errno;
1098 }
1099
1100 vduse_queue_enable(vq);
1101
1102 return 0;
1103}
1104
1105int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename)
1106{
1107
1108 size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
1109 void *log;
1110 int i;
1111
1112 dev->log = log = vduse_log_get(filename, log_size);
1113 if (log == MAP_FAILED) {
1114 fprintf(stderr, "Failed to get vduse log\n");
1115 return -EINVAL;
1116 }
1117
1118 for (i = 0; i < dev->num_queues; i++) {
1119 dev->vqs[i].log = log;
1120 dev->vqs[i].log->inflight.desc_num = VIRTQUEUE_MAX_SIZE;
1121 log = (void *)((char *)log + vduse_vq_log_size(VIRTQUEUE_MAX_SIZE));
1122 }
1123
1124 return 0;
1125}
1126
1127static int vduse_dev_init_vqs(VduseDev *dev, uint16_t num_queues)
1128{
1129 VduseVirtq *vqs;
1130 int i;
1131
1132 vqs = calloc(sizeof(VduseVirtq), num_queues);
1133 if (!vqs) {
1134 return -ENOMEM;
1135 }
1136
1137 for (i = 0; i < num_queues; i++) {
1138 vqs[i].index = i;
1139 vqs[i].dev = dev;
1140 vqs[i].fd = -1;
1141 }
1142 dev->vqs = vqs;
1143
1144 return 0;
1145}
1146
1147static int vduse_dev_init(VduseDev *dev, const char *name,
1148 uint16_t num_queues, const VduseOps *ops,
1149 void *priv)
1150{
1151 char *dev_path, *dev_name;
1152 int ret, fd;
1153
1154 dev_path = malloc(strlen(name) + strlen("/dev/vduse/") + 1);
1155 if (!dev_path) {
1156 return -ENOMEM;
1157 }
1158 sprintf(dev_path, "/dev/vduse/%s", name);
1159
1160 fd = open(dev_path, O_RDWR);
1161 free(dev_path);
1162 if (fd < 0) {
1163 fprintf(stderr, "Failed to open vduse dev %s: %s\n",
1164 name, strerror(errno));
1165 return -errno;
1166 }
1167
1168 if (ioctl(fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
1169 fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
1170 close(fd);
1171 return -errno;
1172 }
1173
1174 dev_name = strdup(name);
1175 if (!dev_name) {
1176 close(fd);
1177 return -ENOMEM;
1178 }
1179
1180 ret = vduse_dev_init_vqs(dev, num_queues);
1181 if (ret) {
1182 free(dev_name);
1183 close(fd);
1184 return ret;
1185 }
1186
1187 dev->name = dev_name;
1188 dev->num_queues = num_queues;
1189 dev->fd = fd;
1190 dev->ops = ops;
1191 dev->priv = priv;
1192
1193 return 0;
1194}
1195
1196static inline bool vduse_name_is_invalid(const char *name)
1197{
1198 return strlen(name) >= VDUSE_NAME_MAX || strstr(name, "..");
1199}
1200
1201VduseDev *vduse_dev_create_by_fd(int fd, uint16_t num_queues,
1202 const VduseOps *ops, void *priv)
1203{
1204 VduseDev *dev;
1205 int ret;
1206
1207 if (!ops || !ops->enable_queue || !ops->disable_queue) {
1208 fprintf(stderr, "Invalid parameter for vduse\n");
1209 return NULL;
1210 }
1211
1212 dev = calloc(sizeof(VduseDev), 1);
1213 if (!dev) {
1214 fprintf(stderr, "Failed to allocate vduse device\n");
1215 return NULL;
1216 }
1217
1218 if (ioctl(fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
1219 fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
1220 free(dev);
1221 return NULL;
1222 }
1223
1224 ret = vduse_dev_init_vqs(dev, num_queues);
1225 if (ret) {
1226 fprintf(stderr, "Failed to init vqs\n");
1227 free(dev);
1228 return NULL;
1229 }
1230
1231 dev->num_queues = num_queues;
1232 dev->fd = fd;
1233 dev->ops = ops;
1234 dev->priv = priv;
1235
1236 return dev;
1237}
1238
1239VduseDev *vduse_dev_create_by_name(const char *name, uint16_t num_queues,
1240 const VduseOps *ops, void *priv)
1241{
1242 VduseDev *dev;
1243 int ret;
1244
1245 if (!name || vduse_name_is_invalid(name) || !ops ||
1246 !ops->enable_queue || !ops->disable_queue) {
1247 fprintf(stderr, "Invalid parameter for vduse\n");
1248 return NULL;
1249 }
1250
1251 dev = calloc(sizeof(VduseDev), 1);
1252 if (!dev) {
1253 fprintf(stderr, "Failed to allocate vduse device\n");
1254 return NULL;
1255 }
1256
1257 ret = vduse_dev_init(dev, name, num_queues, ops, priv);
1258 if (ret < 0) {
1259 fprintf(stderr, "Failed to init vduse device %s: %s\n",
1260 name, strerror(-ret));
1261 free(dev);
1262 return NULL;
1263 }
1264
1265 return dev;
1266}
1267
1268VduseDev *vduse_dev_create(const char *name, uint32_t device_id,
1269 uint32_t vendor_id, uint64_t features,
1270 uint16_t num_queues, uint32_t config_size,
1271 char *config, const VduseOps *ops, void *priv)
1272{
1273 VduseDev *dev;
1274 int ret, ctrl_fd;
1275 uint64_t version;
1276 struct vduse_dev_config *dev_config;
1277 size_t size = offsetof(struct vduse_dev_config, config);
1278
1279 if (!name || vduse_name_is_invalid(name) ||
1280 !has_feature(features, VIRTIO_F_VERSION_1) || !config ||
1281 !config_size || !ops || !ops->enable_queue || !ops->disable_queue) {
1282 fprintf(stderr, "Invalid parameter for vduse\n");
1283 return NULL;
1284 }
1285
1286 dev = calloc(sizeof(VduseDev), 1);
1287 if (!dev) {
1288 fprintf(stderr, "Failed to allocate vduse device\n");
1289 return NULL;
1290 }
1291
1292 ctrl_fd = open("/dev/vduse/control", O_RDWR);
1293 if (ctrl_fd < 0) {
1294 fprintf(stderr, "Failed to open /dev/vduse/control: %s\n",
1295 strerror(errno));
1296 goto err_ctrl;
1297 }
1298
1299 version = VDUSE_API_VERSION;
1300 if (ioctl(ctrl_fd, VDUSE_SET_API_VERSION, &version)) {
1301 fprintf(stderr, "Failed to set api version %" PRIu64 ": %s\n",
1302 version, strerror(errno));
1303 goto err_dev;
1304 }
1305
1306 dev_config = calloc(size + config_size, 1);
1307 if (!dev_config) {
1308 fprintf(stderr, "Failed to allocate config space\n");
1309 goto err_dev;
1310 }
1311
1312 strncpy(dev_config->name, name, VDUSE_NAME_MAX);
1313 dev_config->name[VDUSE_NAME_MAX - 1] = '\0';
1314 dev_config->device_id = device_id;
1315 dev_config->vendor_id = vendor_id;
1316 dev_config->features = features;
1317 dev_config->vq_num = num_queues;
1318 dev_config->vq_align = VDUSE_VQ_ALIGN;
1319 dev_config->config_size = config_size;
1320 memcpy(dev_config->config, config, config_size);
1321
1322 ret = ioctl(ctrl_fd, VDUSE_CREATE_DEV, dev_config);
1323 free(dev_config);
1324 if (ret && errno != EEXIST) {
1325 fprintf(stderr, "Failed to create vduse device %s: %s\n",
1326 name, strerror(errno));
1327 goto err_dev;
1328 }
1329 dev->ctrl_fd = ctrl_fd;
1330
1331 ret = vduse_dev_init(dev, name, num_queues, ops, priv);
1332 if (ret < 0) {
1333 fprintf(stderr, "Failed to init vduse device %s: %s\n",
1334 name, strerror(-ret));
1335 goto err;
1336 }
1337
1338 return dev;
1339err:
1340 ioctl(ctrl_fd, VDUSE_DESTROY_DEV, name);
1341err_dev:
1342 close(ctrl_fd);
1343err_ctrl:
1344 free(dev);
1345
1346 return NULL;
1347}
1348
1349int vduse_dev_destroy(VduseDev *dev)
1350{
1351 size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
1352 int i, ret = 0;
1353
1354 if (dev->log) {
1355 munmap(dev->log, log_size);
1356 }
1357 for (i = 0; i < dev->num_queues; i++) {
1358 free(dev->vqs[i].resubmit_list);
1359 }
1360 free(dev->vqs);
1361 if (dev->fd >= 0) {
1362 close(dev->fd);
1363 dev->fd = -1;
1364 }
1365 if (dev->ctrl_fd >= 0) {
1366 if (ioctl(dev->ctrl_fd, VDUSE_DESTROY_DEV, dev->name)) {
1367 ret = -errno;
1368 }
1369 close(dev->ctrl_fd);
1370 dev->ctrl_fd = -1;
1371 }
1372 free(dev->name);
1373 free(dev);
1374
1375 return ret;
1376}
1377