1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <stdlib.h>
18#include <stdio.h>
19#include <unistd.h>
20#include <stdarg.h>
21#include <errno.h>
22#include <string.h>
23#include <assert.h>
24#include <inttypes.h>
25#include <sys/types.h>
26#include <sys/socket.h>
27#include <sys/eventfd.h>
28#include <sys/mman.h>
29#include <endian.h>
30
31#if defined(__linux__)
32#include <sys/syscall.h>
33#include <fcntl.h>
34#include <sys/ioctl.h>
35#include <linux/vhost.h>
36
37#ifdef __NR_userfaultfd
38#include <linux/userfaultfd.h>
39#endif
40
41#endif
42
43#include "include/atomic.h"
44
45#include "libvhost-user.h"
46
47
48#ifndef MIN
49#define MIN(x, y) ({ \
50 typeof(x) _min1 = (x); \
51 typeof(y) _min2 = (y); \
52 (void) (&_min1 == &_min2); \
53 _min1 < _min2 ? _min1 : _min2; })
54#endif
55
56
57#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
58
59
60#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
61
62#ifndef unlikely
63#define unlikely(x) __builtin_expect(!!(x), 0)
64#endif
65
66
67#define INFLIGHT_ALIGNMENT 64
68
69
70#define INFLIGHT_VERSION 1
71
72
73#define VHOST_USER_VERSION 1
74#define LIBVHOST_USER_DEBUG 0
75
76#define DPRINT(...) \
77 do { \
78 if (LIBVHOST_USER_DEBUG) { \
79 fprintf(stderr, __VA_ARGS__); \
80 } \
81 } while (0)
82
83static inline
84bool has_feature(uint64_t features, unsigned int fbit)
85{
86 assert(fbit < 64);
87 return !!(features & (1ULL << fbit));
88}
89
90static inline
91bool vu_has_feature(VuDev *dev,
92 unsigned int fbit)
93{
94 return has_feature(dev->features, fbit);
95}
96
97static inline bool vu_has_protocol_feature(VuDev *dev, unsigned int fbit)
98{
99 return has_feature(dev->protocol_features, fbit);
100}
101
102static const char *
103vu_request_to_string(unsigned int req)
104{
105#define REQ(req) [req] = #req
106 static const char *vu_request_str[] = {
107 REQ(VHOST_USER_NONE),
108 REQ(VHOST_USER_GET_FEATURES),
109 REQ(VHOST_USER_SET_FEATURES),
110 REQ(VHOST_USER_SET_OWNER),
111 REQ(VHOST_USER_RESET_OWNER),
112 REQ(VHOST_USER_SET_MEM_TABLE),
113 REQ(VHOST_USER_SET_LOG_BASE),
114 REQ(VHOST_USER_SET_LOG_FD),
115 REQ(VHOST_USER_SET_VRING_NUM),
116 REQ(VHOST_USER_SET_VRING_ADDR),
117 REQ(VHOST_USER_SET_VRING_BASE),
118 REQ(VHOST_USER_GET_VRING_BASE),
119 REQ(VHOST_USER_SET_VRING_KICK),
120 REQ(VHOST_USER_SET_VRING_CALL),
121 REQ(VHOST_USER_SET_VRING_ERR),
122 REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
123 REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
124 REQ(VHOST_USER_GET_QUEUE_NUM),
125 REQ(VHOST_USER_SET_VRING_ENABLE),
126 REQ(VHOST_USER_SEND_RARP),
127 REQ(VHOST_USER_NET_SET_MTU),
128 REQ(VHOST_USER_SET_SLAVE_REQ_FD),
129 REQ(VHOST_USER_IOTLB_MSG),
130 REQ(VHOST_USER_SET_VRING_ENDIAN),
131 REQ(VHOST_USER_GET_CONFIG),
132 REQ(VHOST_USER_SET_CONFIG),
133 REQ(VHOST_USER_POSTCOPY_ADVISE),
134 REQ(VHOST_USER_POSTCOPY_LISTEN),
135 REQ(VHOST_USER_POSTCOPY_END),
136 REQ(VHOST_USER_GET_INFLIGHT_FD),
137 REQ(VHOST_USER_SET_INFLIGHT_FD),
138 REQ(VHOST_USER_GPU_SET_SOCKET),
139 REQ(VHOST_USER_VRING_KICK),
140 REQ(VHOST_USER_GET_MAX_MEM_SLOTS),
141 REQ(VHOST_USER_ADD_MEM_REG),
142 REQ(VHOST_USER_REM_MEM_REG),
143 REQ(VHOST_USER_MAX),
144 };
145#undef REQ
146
147 if (req < VHOST_USER_MAX) {
148 return vu_request_str[req];
149 } else {
150 return "unknown";
151 }
152}
153
154static void
155vu_panic(VuDev *dev, const char *msg, ...)
156{
157 char *buf = NULL;
158 va_list ap;
159
160 va_start(ap, msg);
161 if (vasprintf(&buf, msg, ap) < 0) {
162 buf = NULL;
163 }
164 va_end(ap);
165
166 dev->broken = true;
167 dev->panic(dev, buf);
168 free(buf);
169
170
171
172
173
174}
175
176
177void *
178vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr)
179{
180 int i;
181
182 if (*plen == 0) {
183 return NULL;
184 }
185
186
187 for (i = 0; i < dev->nregions; i++) {
188 VuDevRegion *r = &dev->regions[i];
189
190 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
191 if ((guest_addr + *plen) > (r->gpa + r->size)) {
192 *plen = r->gpa + r->size - guest_addr;
193 }
194 return (void *)(uintptr_t)
195 guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
196 }
197 }
198
199 return NULL;
200}
201
202
203static void *
204qva_to_va(VuDev *dev, uint64_t qemu_addr)
205{
206 int i;
207
208
209 for (i = 0; i < dev->nregions; i++) {
210 VuDevRegion *r = &dev->regions[i];
211
212 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
213 return (void *)(uintptr_t)
214 qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
215 }
216 }
217
218 return NULL;
219}
220
221static void
222vmsg_close_fds(VhostUserMsg *vmsg)
223{
224 int i;
225
226 for (i = 0; i < vmsg->fd_num; i++) {
227 close(vmsg->fds[i]);
228 }
229}
230
231
232static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val)
233{
234 vmsg->flags = 0;
235 vmsg->size = sizeof(vmsg->payload.u64);
236 vmsg->payload.u64 = val;
237 vmsg->fd_num = 0;
238}
239
240
241static bool
242have_userfault(void)
243{
244#if defined(__linux__) && defined(__NR_userfaultfd) &&\
245 defined(UFFD_FEATURE_MISSING_SHMEM) &&\
246 defined(UFFD_FEATURE_MISSING_HUGETLBFS)
247
248 int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
249 struct uffdio_api api_struct;
250 if (ufd < 0) {
251 return false;
252 }
253
254 api_struct.api = UFFD_API;
255 api_struct.features = UFFD_FEATURE_MISSING_SHMEM |
256 UFFD_FEATURE_MISSING_HUGETLBFS;
257 if (ioctl(ufd, UFFDIO_API, &api_struct)) {
258 close(ufd);
259 return false;
260 }
261 close(ufd);
262 return true;
263
264#else
265 return false;
266#endif
267}
268
269static bool
270vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
271{
272 char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
273 struct iovec iov = {
274 .iov_base = (char *)vmsg,
275 .iov_len = VHOST_USER_HDR_SIZE,
276 };
277 struct msghdr msg = {
278 .msg_iov = &iov,
279 .msg_iovlen = 1,
280 .msg_control = control,
281 .msg_controllen = sizeof(control),
282 };
283 size_t fd_size;
284 struct cmsghdr *cmsg;
285 int rc;
286
287 do {
288 rc = recvmsg(conn_fd, &msg, 0);
289 } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
290
291 if (rc < 0) {
292 vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
293 return false;
294 }
295
296 vmsg->fd_num = 0;
297 for (cmsg = CMSG_FIRSTHDR(&msg);
298 cmsg != NULL;
299 cmsg = CMSG_NXTHDR(&msg, cmsg))
300 {
301 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
302 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
303 vmsg->fd_num = fd_size / sizeof(int);
304 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
305 break;
306 }
307 }
308
309 if (vmsg->size > sizeof(vmsg->payload)) {
310 vu_panic(dev,
311 "Error: too big message request: %d, size: vmsg->size: %u, "
312 "while sizeof(vmsg->payload) = %zu\n",
313 vmsg->request, vmsg->size, sizeof(vmsg->payload));
314 goto fail;
315 }
316
317 if (vmsg->size) {
318 do {
319 rc = read(conn_fd, &vmsg->payload, vmsg->size);
320 } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
321
322 if (rc <= 0) {
323 vu_panic(dev, "Error while reading: %s", strerror(errno));
324 goto fail;
325 }
326
327 assert(rc == vmsg->size);
328 }
329
330 return true;
331
332fail:
333 vmsg_close_fds(vmsg);
334
335 return false;
336}
337
338static bool
339vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
340{
341 int rc;
342 uint8_t *p = (uint8_t *)vmsg;
343 char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
344 struct iovec iov = {
345 .iov_base = (char *)vmsg,
346 .iov_len = VHOST_USER_HDR_SIZE,
347 };
348 struct msghdr msg = {
349 .msg_iov = &iov,
350 .msg_iovlen = 1,
351 .msg_control = control,
352 };
353 struct cmsghdr *cmsg;
354
355 memset(control, 0, sizeof(control));
356 assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
357 if (vmsg->fd_num > 0) {
358 size_t fdsize = vmsg->fd_num * sizeof(int);
359 msg.msg_controllen = CMSG_SPACE(fdsize);
360 cmsg = CMSG_FIRSTHDR(&msg);
361 cmsg->cmsg_len = CMSG_LEN(fdsize);
362 cmsg->cmsg_level = SOL_SOCKET;
363 cmsg->cmsg_type = SCM_RIGHTS;
364 memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
365 } else {
366 msg.msg_controllen = 0;
367 }
368
369 do {
370 rc = sendmsg(conn_fd, &msg, 0);
371 } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
372
373 if (vmsg->size) {
374 do {
375 if (vmsg->data) {
376 rc = write(conn_fd, vmsg->data, vmsg->size);
377 } else {
378 rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
379 }
380 } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
381 }
382
383 if (rc <= 0) {
384 vu_panic(dev, "Error while writing: %s", strerror(errno));
385 return false;
386 }
387
388 return true;
389}
390
391static bool
392vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
393{
394
395 vmsg->flags &= ~VHOST_USER_VERSION_MASK;
396 vmsg->flags |= VHOST_USER_VERSION;
397 vmsg->flags |= VHOST_USER_REPLY_MASK;
398
399 return vu_message_write(dev, conn_fd, vmsg);
400}
401
402
403
404
405
406
407static bool
408vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
409{
410 VhostUserMsg msg_reply;
411 bool result = false;
412
413 if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
414 result = true;
415 goto out;
416 }
417
418 if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
419 goto out;
420 }
421
422 if (msg_reply.request != vmsg->request) {
423 DPRINT("Received unexpected msg type. Expected %d received %d",
424 vmsg->request, msg_reply.request);
425 goto out;
426 }
427
428 result = msg_reply.payload.u64 == 0;
429
430out:
431 pthread_mutex_unlock(&dev->slave_mutex);
432 return result;
433}
434
435
436static void
437vu_log_kick(VuDev *dev)
438{
439 if (dev->log_call_fd != -1) {
440 DPRINT("Kicking the QEMU's log...\n");
441 if (eventfd_write(dev->log_call_fd, 1) < 0) {
442 vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
443 }
444 }
445}
446
447static void
448vu_log_page(uint8_t *log_table, uint64_t page)
449{
450 DPRINT("Logged dirty guest page: %"PRId64"\n", page);
451 qatomic_or(&log_table[page / 8], 1 << (page % 8));
452}
453
454static void
455vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
456{
457 uint64_t page;
458
459 if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
460 !dev->log_table || !length) {
461 return;
462 }
463
464 assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
465
466 page = address / VHOST_LOG_PAGE;
467 while (page * VHOST_LOG_PAGE < address + length) {
468 vu_log_page(dev->log_table, page);
469 page += 1;
470 }
471
472 vu_log_kick(dev);
473}
474
475static void
476vu_kick_cb(VuDev *dev, int condition, void *data)
477{
478 int index = (intptr_t)data;
479 VuVirtq *vq = &dev->vq[index];
480 int sock = vq->kick_fd;
481 eventfd_t kick_data;
482 ssize_t rc;
483
484 rc = eventfd_read(sock, &kick_data);
485 if (rc == -1) {
486 vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
487 dev->remove_watch(dev, dev->vq[index].kick_fd);
488 } else {
489 DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
490 kick_data, vq->handler, index);
491 if (vq->handler) {
492 vq->handler(dev, index);
493 }
494 }
495}
496
497static bool
498vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
499{
500 vmsg->payload.u64 =
501
502
503
504
505 1ULL << VIRTIO_F_NOTIFY_ON_EMPTY |
506 1ULL << VIRTIO_RING_F_INDIRECT_DESC |
507 1ULL << VIRTIO_RING_F_EVENT_IDX |
508 1ULL << VIRTIO_F_VERSION_1 |
509
510
511 1ULL << VHOST_F_LOG_ALL |
512 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
513
514 if (dev->iface->get_features) {
515 vmsg->payload.u64 |= dev->iface->get_features(dev);
516 }
517
518 vmsg->size = sizeof(vmsg->payload.u64);
519 vmsg->fd_num = 0;
520
521 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
522
523 return true;
524}
525
526static void
527vu_set_enable_all_rings(VuDev *dev, bool enabled)
528{
529 uint16_t i;
530
531 for (i = 0; i < dev->max_queues; i++) {
532 dev->vq[i].enable = enabled;
533 }
534}
535
536static bool
537vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
538{
539 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
540
541 dev->features = vmsg->payload.u64;
542 if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) {
543
544
545
546
547 vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user");
548 return false;
549 }
550
551 if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
552 vu_set_enable_all_rings(dev, true);
553 }
554
555 if (dev->iface->set_features) {
556 dev->iface->set_features(dev, dev->features);
557 }
558
559 return false;
560}
561
562static bool
563vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
564{
565 return false;
566}
567
568static void
569vu_close_log(VuDev *dev)
570{
571 if (dev->log_table) {
572 if (munmap(dev->log_table, dev->log_size) != 0) {
573 perror("close log munmap() error");
574 }
575
576 dev->log_table = NULL;
577 }
578 if (dev->log_call_fd != -1) {
579 close(dev->log_call_fd);
580 dev->log_call_fd = -1;
581 }
582}
583
584static bool
585vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
586{
587 vu_set_enable_all_rings(dev, false);
588
589 return false;
590}
591
592static bool
593map_ring(VuDev *dev, VuVirtq *vq)
594{
595 vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
596 vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
597 vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
598
599 DPRINT("Setting virtq addresses:\n");
600 DPRINT(" vring_desc at %p\n", vq->vring.desc);
601 DPRINT(" vring_used at %p\n", vq->vring.used);
602 DPRINT(" vring_avail at %p\n", vq->vring.avail);
603
604 return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
605}
606
607static bool
608generate_faults(VuDev *dev) {
609 int i;
610 for (i = 0; i < dev->nregions; i++) {
611 VuDevRegion *dev_region = &dev->regions[i];
612 int ret;
613#ifdef UFFDIO_REGISTER
614
615
616
617
618
619
620
621
622 ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
623 dev_region->size + dev_region->mmap_offset,
624 MADV_DONTNEED);
625 if (ret) {
626 fprintf(stderr,
627 "%s: Failed to madvise(DONTNEED) region %d: %s\n",
628 __func__, i, strerror(errno));
629 }
630
631
632
633
634
635 ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
636 dev_region->size + dev_region->mmap_offset,
637 MADV_NOHUGEPAGE);
638 if (ret) {
639
640
641
642
643 fprintf(stderr,
644 "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
645 __func__, i, strerror(errno));
646 }
647 struct uffdio_register reg_struct;
648 reg_struct.range.start = (uintptr_t)dev_region->mmap_addr;
649 reg_struct.range.len = dev_region->size + dev_region->mmap_offset;
650 reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
651
652 if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) {
653 vu_panic(dev, "%s: Failed to userfault region %d "
654 "@%p + size:%zx offset: %zx: (ufd=%d)%s\n",
655 __func__, i,
656 dev_region->mmap_addr,
657 dev_region->size, dev_region->mmap_offset,
658 dev->postcopy_ufd, strerror(errno));
659 return false;
660 }
661 if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
662 vu_panic(dev, "%s Region (%d) doesn't support COPY",
663 __func__, i);
664 return false;
665 }
666 DPRINT("%s: region %d: Registered userfault for %"
667 PRIx64 " + %" PRIx64 "\n", __func__, i,
668 (uint64_t)reg_struct.range.start,
669 (uint64_t)reg_struct.range.len);
670
671 if (mprotect((void *)(uintptr_t)dev_region->mmap_addr,
672 dev_region->size + dev_region->mmap_offset,
673 PROT_READ | PROT_WRITE)) {
674 vu_panic(dev, "failed to mprotect region %d for postcopy (%s)",
675 i, strerror(errno));
676 return false;
677 }
678
679#endif
680 }
681
682 return true;
683}
684
685static bool
686vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
687 int i;
688 bool track_ramblocks = dev->postcopy_listening;
689 VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
690 VuDevRegion *dev_region = &dev->regions[dev->nregions];
691 void *mmap_addr;
692
693 if (vmsg->fd_num != 1) {
694 vmsg_close_fds(vmsg);
695 vu_panic(dev, "VHOST_USER_ADD_MEM_REG received %d fds - only 1 fd "
696 "should be sent for this message type", vmsg->fd_num);
697 return false;
698 }
699
700 if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
701 close(vmsg->fds[0]);
702 vu_panic(dev, "VHOST_USER_ADD_MEM_REG requires a message size of at "
703 "least %d bytes and only %d bytes were received",
704 VHOST_USER_MEM_REG_SIZE, vmsg->size);
705 return false;
706 }
707
708 if (dev->nregions == VHOST_USER_MAX_RAM_SLOTS) {
709 close(vmsg->fds[0]);
710 vu_panic(dev, "failing attempt to hot add memory via "
711 "VHOST_USER_ADD_MEM_REG message because the backend has "
712 "no free ram slots available");
713 return false;
714 }
715
716
717
718
719
720
721 if (track_ramblocks &&
722 vmsg->size == sizeof(vmsg->payload.u64) &&
723 vmsg->payload.u64 == 0) {
724 (void)generate_faults(dev);
725 return false;
726 }
727
728 DPRINT("Adding region: %u\n", dev->nregions);
729 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
730 msg_region->guest_phys_addr);
731 DPRINT(" memory_size: 0x%016"PRIx64"\n",
732 msg_region->memory_size);
733 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
734 msg_region->userspace_addr);
735 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
736 msg_region->mmap_offset);
737
738 dev_region->gpa = msg_region->guest_phys_addr;
739 dev_region->size = msg_region->memory_size;
740 dev_region->qva = msg_region->userspace_addr;
741 dev_region->mmap_offset = msg_region->mmap_offset;
742
743
744
745
746
747
748 if (track_ramblocks) {
749
750
751
752
753 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
754 PROT_NONE, MAP_SHARED | MAP_NORESERVE,
755 vmsg->fds[0], 0);
756 } else {
757 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
758 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE,
759 vmsg->fds[0], 0);
760 }
761
762 if (mmap_addr == MAP_FAILED) {
763 vu_panic(dev, "region mmap error: %s", strerror(errno));
764 } else {
765 dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
766 DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
767 dev_region->mmap_addr);
768 }
769
770 close(vmsg->fds[0]);
771
772 if (track_ramblocks) {
773
774
775
776
777 msg_region->userspace_addr = (uintptr_t)(mmap_addr +
778 dev_region->mmap_offset);
779
780
781 vmsg->fd_num = 0;
782 if (!vu_send_reply(dev, dev->sock, vmsg)) {
783 vu_panic(dev, "failed to respond to add-mem-region for postcopy");
784 return false;
785 }
786
787 DPRINT("Successfully added new region in postcopy\n");
788 dev->nregions++;
789 return false;
790
791 } else {
792 for (i = 0; i < dev->max_queues; i++) {
793 if (dev->vq[i].vring.desc) {
794 if (map_ring(dev, &dev->vq[i])) {
795 vu_panic(dev, "remapping queue %d for new memory region",
796 i);
797 }
798 }
799 }
800
801 DPRINT("Successfully added new region\n");
802 dev->nregions++;
803 vmsg_set_reply_u64(vmsg, 0);
804 return true;
805 }
806}
807
808static inline bool reg_equal(VuDevRegion *vudev_reg,
809 VhostUserMemoryRegion *msg_reg)
810{
811 if (vudev_reg->gpa == msg_reg->guest_phys_addr &&
812 vudev_reg->qva == msg_reg->userspace_addr &&
813 vudev_reg->size == msg_reg->memory_size) {
814 return true;
815 }
816
817 return false;
818}
819
820static bool
821vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
822 VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
823 int i;
824 bool found = false;
825
826 if (vmsg->fd_num != 1) {
827 vmsg_close_fds(vmsg);
828 vu_panic(dev, "VHOST_USER_REM_MEM_REG received %d fds - only 1 fd "
829 "should be sent for this message type", vmsg->fd_num);
830 return false;
831 }
832
833 if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
834 close(vmsg->fds[0]);
835 vu_panic(dev, "VHOST_USER_REM_MEM_REG requires a message size of at "
836 "least %d bytes and only %d bytes were received",
837 VHOST_USER_MEM_REG_SIZE, vmsg->size);
838 return false;
839 }
840
841 DPRINT("Removing region:\n");
842 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
843 msg_region->guest_phys_addr);
844 DPRINT(" memory_size: 0x%016"PRIx64"\n",
845 msg_region->memory_size);
846 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
847 msg_region->userspace_addr);
848 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
849 msg_region->mmap_offset);
850
851 for (i = 0; i < dev->nregions; i++) {
852 if (reg_equal(&dev->regions[i], msg_region)) {
853 VuDevRegion *r = &dev->regions[i];
854 void *m = (void *) (uintptr_t) r->mmap_addr;
855
856 if (m) {
857 munmap(m, r->size + r->mmap_offset);
858 }
859
860
861
862
863
864 memmove(dev->regions + i, dev->regions + i + 1,
865 sizeof(VuDevRegion) * (dev->nregions - i - 1));
866 memset(dev->regions + dev->nregions - 1, 0, sizeof(VuDevRegion));
867 DPRINT("Successfully removed a region\n");
868 dev->nregions--;
869 i--;
870
871 found = true;
872
873
874 }
875 }
876
877 if (found) {
878 vmsg_set_reply_u64(vmsg, 0);
879 } else {
880 vu_panic(dev, "Specified region not found\n");
881 }
882
883 close(vmsg->fds[0]);
884
885 return true;
886}
887
888static bool
889vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
890{
891 int i;
892 VhostUserMemory m = vmsg->payload.memory, *memory = &m;
893 dev->nregions = memory->nregions;
894
895 DPRINT("Nregions: %u\n", memory->nregions);
896 for (i = 0; i < dev->nregions; i++) {
897 void *mmap_addr;
898 VhostUserMemoryRegion *msg_region = &memory->regions[i];
899 VuDevRegion *dev_region = &dev->regions[i];
900
901 DPRINT("Region %d\n", i);
902 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
903 msg_region->guest_phys_addr);
904 DPRINT(" memory_size: 0x%016"PRIx64"\n",
905 msg_region->memory_size);
906 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
907 msg_region->userspace_addr);
908 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
909 msg_region->mmap_offset);
910
911 dev_region->gpa = msg_region->guest_phys_addr;
912 dev_region->size = msg_region->memory_size;
913 dev_region->qva = msg_region->userspace_addr;
914 dev_region->mmap_offset = msg_region->mmap_offset;
915
916
917
918
919
920
921
922 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
923 PROT_NONE, MAP_SHARED | MAP_NORESERVE,
924 vmsg->fds[i], 0);
925
926 if (mmap_addr == MAP_FAILED) {
927 vu_panic(dev, "region mmap error: %s", strerror(errno));
928 } else {
929 dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
930 DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
931 dev_region->mmap_addr);
932 }
933
934
935
936
937 msg_region->userspace_addr = (uintptr_t)(mmap_addr +
938 dev_region->mmap_offset);
939 close(vmsg->fds[i]);
940 }
941
942
943 vmsg->fd_num = 0;
944 if (!vu_send_reply(dev, dev->sock, vmsg)) {
945 vu_panic(dev, "failed to respond to set-mem-table for postcopy");
946 return false;
947 }
948
949
950
951
952 if (!dev->read_msg(dev, dev->sock, vmsg) ||
953 vmsg->size != sizeof(vmsg->payload.u64) ||
954 vmsg->payload.u64 != 0) {
955 vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
956 return false;
957 }
958
959
960 (void)generate_faults(dev);
961
962 return false;
963}
964
965static bool
966vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
967{
968 int i;
969 VhostUserMemory m = vmsg->payload.memory, *memory = &m;
970
971 for (i = 0; i < dev->nregions; i++) {
972 VuDevRegion *r = &dev->regions[i];
973 void *m = (void *) (uintptr_t) r->mmap_addr;
974
975 if (m) {
976 munmap(m, r->size + r->mmap_offset);
977 }
978 }
979 dev->nregions = memory->nregions;
980
981 if (dev->postcopy_listening) {
982 return vu_set_mem_table_exec_postcopy(dev, vmsg);
983 }
984
985 DPRINT("Nregions: %u\n", memory->nregions);
986 for (i = 0; i < dev->nregions; i++) {
987 void *mmap_addr;
988 VhostUserMemoryRegion *msg_region = &memory->regions[i];
989 VuDevRegion *dev_region = &dev->regions[i];
990
991 DPRINT("Region %d\n", i);
992 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
993 msg_region->guest_phys_addr);
994 DPRINT(" memory_size: 0x%016"PRIx64"\n",
995 msg_region->memory_size);
996 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
997 msg_region->userspace_addr);
998 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
999 msg_region->mmap_offset);
1000
1001 dev_region->gpa = msg_region->guest_phys_addr;
1002 dev_region->size = msg_region->memory_size;
1003 dev_region->qva = msg_region->userspace_addr;
1004 dev_region->mmap_offset = msg_region->mmap_offset;
1005
1006
1007
1008
1009 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
1010 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE,
1011 vmsg->fds[i], 0);
1012
1013 if (mmap_addr == MAP_FAILED) {
1014 vu_panic(dev, "region mmap error: %s", strerror(errno));
1015 } else {
1016 dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
1017 DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
1018 dev_region->mmap_addr);
1019 }
1020
1021 close(vmsg->fds[i]);
1022 }
1023
1024 for (i = 0; i < dev->max_queues; i++) {
1025 if (dev->vq[i].vring.desc) {
1026 if (map_ring(dev, &dev->vq[i])) {
1027 vu_panic(dev, "remapping queue %d during setmemtable", i);
1028 }
1029 }
1030 }
1031
1032 return false;
1033}
1034
1035static bool
1036vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
1037{
1038 int fd;
1039 uint64_t log_mmap_size, log_mmap_offset;
1040 void *rc;
1041
1042 if (vmsg->fd_num != 1 ||
1043 vmsg->size != sizeof(vmsg->payload.log)) {
1044 vu_panic(dev, "Invalid log_base message");
1045 return true;
1046 }
1047
1048 fd = vmsg->fds[0];
1049 log_mmap_offset = vmsg->payload.log.mmap_offset;
1050 log_mmap_size = vmsg->payload.log.mmap_size;
1051 DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
1052 DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
1053
1054 rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
1055 log_mmap_offset);
1056 close(fd);
1057 if (rc == MAP_FAILED) {
1058 perror("log mmap error");
1059 }
1060
1061 if (dev->log_table) {
1062 munmap(dev->log_table, dev->log_size);
1063 }
1064 dev->log_table = rc;
1065 dev->log_size = log_mmap_size;
1066
1067 vmsg->size = sizeof(vmsg->payload.u64);
1068 vmsg->fd_num = 0;
1069
1070 return true;
1071}
1072
1073static bool
1074vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
1075{
1076 if (vmsg->fd_num != 1) {
1077 vu_panic(dev, "Invalid log_fd message");
1078 return false;
1079 }
1080
1081 if (dev->log_call_fd != -1) {
1082 close(dev->log_call_fd);
1083 }
1084 dev->log_call_fd = vmsg->fds[0];
1085 DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
1086
1087 return false;
1088}
1089
1090static bool
1091vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
1092{
1093 unsigned int index = vmsg->payload.state.index;
1094 unsigned int num = vmsg->payload.state.num;
1095
1096 DPRINT("State.index: %u\n", index);
1097 DPRINT("State.num: %u\n", num);
1098 dev->vq[index].vring.num = num;
1099
1100 return false;
1101}
1102
1103static bool
1104vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
1105{
1106 struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr;
1107 unsigned int index = vra->index;
1108 VuVirtq *vq = &dev->vq[index];
1109
1110 DPRINT("vhost_vring_addr:\n");
1111 DPRINT(" index: %d\n", vra->index);
1112 DPRINT(" flags: %d\n", vra->flags);
1113 DPRINT(" desc_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->desc_user_addr);
1114 DPRINT(" used_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->used_user_addr);
1115 DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->avail_user_addr);
1116 DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->log_guest_addr);
1117
1118 vq->vra = *vra;
1119 vq->vring.flags = vra->flags;
1120 vq->vring.log_guest_addr = vra->log_guest_addr;
1121
1122
1123 if (map_ring(dev, vq)) {
1124 vu_panic(dev, "Invalid vring_addr message");
1125 return false;
1126 }
1127
1128 vq->used_idx = le16toh(vq->vring.used->idx);
1129
1130 if (vq->last_avail_idx != vq->used_idx) {
1131 bool resume = dev->iface->queue_is_processed_in_order &&
1132 dev->iface->queue_is_processed_in_order(dev, index);
1133
1134 DPRINT("Last avail index != used index: %u != %u%s\n",
1135 vq->last_avail_idx, vq->used_idx,
1136 resume ? ", resuming" : "");
1137
1138 if (resume) {
1139 vq->shadow_avail_idx = vq->last_avail_idx = vq->used_idx;
1140 }
1141 }
1142
1143 return false;
1144}
1145
1146static bool
1147vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
1148{
1149 unsigned int index = vmsg->payload.state.index;
1150 unsigned int num = vmsg->payload.state.num;
1151
1152 DPRINT("State.index: %u\n", index);
1153 DPRINT("State.num: %u\n", num);
1154 dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
1155
1156 return false;
1157}
1158
1159static bool
1160vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
1161{
1162 unsigned int index = vmsg->payload.state.index;
1163
1164 DPRINT("State.index: %u\n", index);
1165 vmsg->payload.state.num = dev->vq[index].last_avail_idx;
1166 vmsg->size = sizeof(vmsg->payload.state);
1167
1168 dev->vq[index].started = false;
1169 if (dev->iface->queue_set_started) {
1170 dev->iface->queue_set_started(dev, index, false);
1171 }
1172
1173 if (dev->vq[index].call_fd != -1) {
1174 close(dev->vq[index].call_fd);
1175 dev->vq[index].call_fd = -1;
1176 }
1177 if (dev->vq[index].kick_fd != -1) {
1178 dev->remove_watch(dev, dev->vq[index].kick_fd);
1179 close(dev->vq[index].kick_fd);
1180 dev->vq[index].kick_fd = -1;
1181 }
1182
1183 return true;
1184}
1185
1186static bool
1187vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
1188{
1189 int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
1190 bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
1191
1192 if (index >= dev->max_queues) {
1193 vmsg_close_fds(vmsg);
1194 vu_panic(dev, "Invalid queue index: %u", index);
1195 return false;
1196 }
1197
1198 if (nofd) {
1199 vmsg_close_fds(vmsg);
1200 return true;
1201 }
1202
1203 if (vmsg->fd_num != 1) {
1204 vmsg_close_fds(vmsg);
1205 vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
1206 return false;
1207 }
1208
1209 return true;
1210}
1211
1212static int
1213inflight_desc_compare(const void *a, const void *b)
1214{
1215 VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
1216 *desc1 = (VuVirtqInflightDesc *)b;
1217
1218 if (desc1->counter > desc0->counter &&
1219 (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
1220 return 1;
1221 }
1222
1223 return -1;
1224}
1225
1226static int
1227vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
1228{
1229 int i = 0;
1230
1231 if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
1232 return 0;
1233 }
1234
1235 if (unlikely(!vq->inflight)) {
1236 return -1;
1237 }
1238
1239 if (unlikely(!vq->inflight->version)) {
1240
1241 vq->inflight->version = INFLIGHT_VERSION;
1242 return 0;
1243 }
1244
1245 vq->used_idx = le16toh(vq->vring.used->idx);
1246 vq->resubmit_num = 0;
1247 vq->resubmit_list = NULL;
1248 vq->counter = 0;
1249
1250 if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
1251 vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
1252
1253 barrier();
1254
1255 vq->inflight->used_idx = vq->used_idx;
1256 }
1257
1258 for (i = 0; i < vq->inflight->desc_num; i++) {
1259 if (vq->inflight->desc[i].inflight == 1) {
1260 vq->inuse++;
1261 }
1262 }
1263
1264 vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
1265
1266 if (vq->inuse) {
1267 vq->resubmit_list = calloc(vq->inuse, sizeof(VuVirtqInflightDesc));
1268 if (!vq->resubmit_list) {
1269 return -1;
1270 }
1271
1272 for (i = 0; i < vq->inflight->desc_num; i++) {
1273 if (vq->inflight->desc[i].inflight) {
1274 vq->resubmit_list[vq->resubmit_num].index = i;
1275 vq->resubmit_list[vq->resubmit_num].counter =
1276 vq->inflight->desc[i].counter;
1277 vq->resubmit_num++;
1278 }
1279 }
1280
1281 if (vq->resubmit_num > 1) {
1282 qsort(vq->resubmit_list, vq->resubmit_num,
1283 sizeof(VuVirtqInflightDesc), inflight_desc_compare);
1284 }
1285 vq->counter = vq->resubmit_list[0].counter + 1;
1286 }
1287
1288
1289 if (eventfd_write(vq->kick_fd, 1)) {
1290 return -1;
1291 }
1292
1293 return 0;
1294}
1295
1296static bool
1297vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
1298{
1299 int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
1300 bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
1301
1302 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1303
1304 if (!vu_check_queue_msg_file(dev, vmsg)) {
1305 return false;
1306 }
1307
1308 if (dev->vq[index].kick_fd != -1) {
1309 dev->remove_watch(dev, dev->vq[index].kick_fd);
1310 close(dev->vq[index].kick_fd);
1311 dev->vq[index].kick_fd = -1;
1312 }
1313
1314 dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
1315 DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
1316
1317 dev->vq[index].started = true;
1318 if (dev->iface->queue_set_started) {
1319 dev->iface->queue_set_started(dev, index, true);
1320 }
1321
1322 if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
1323 dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
1324 vu_kick_cb, (void *)(long)index);
1325
1326 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
1327 dev->vq[index].kick_fd, index);
1328 }
1329
1330 if (vu_check_queue_inflights(dev, &dev->vq[index])) {
1331 vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
1332 }
1333
1334 return false;
1335}
1336
1337void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
1338 vu_queue_handler_cb handler)
1339{
1340 int qidx = vq - dev->vq;
1341
1342 vq->handler = handler;
1343 if (vq->kick_fd >= 0) {
1344 if (handler) {
1345 dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
1346 vu_kick_cb, (void *)(long)qidx);
1347 } else {
1348 dev->remove_watch(dev, vq->kick_fd);
1349 }
1350 }
1351}
1352
1353bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
1354 int size, int offset)
1355{
1356 int qidx = vq - dev->vq;
1357 int fd_num = 0;
1358 VhostUserMsg vmsg = {
1359 .request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
1360 .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1361 .size = sizeof(vmsg.payload.area),
1362 .payload.area = {
1363 .u64 = qidx & VHOST_USER_VRING_IDX_MASK,
1364 .size = size,
1365 .offset = offset,
1366 },
1367 };
1368
1369 if (fd == -1) {
1370 vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
1371 } else {
1372 vmsg.fds[fd_num++] = fd;
1373 }
1374
1375 vmsg.fd_num = fd_num;
1376
1377 if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) {
1378 return false;
1379 }
1380
1381 pthread_mutex_lock(&dev->slave_mutex);
1382 if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
1383 pthread_mutex_unlock(&dev->slave_mutex);
1384 return false;
1385 }
1386
1387
1388 return vu_process_message_reply(dev, &vmsg);
1389}
1390
1391static bool
1392vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
1393{
1394 int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
1395 bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
1396
1397 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1398
1399 if (!vu_check_queue_msg_file(dev, vmsg)) {
1400 return false;
1401 }
1402
1403 if (dev->vq[index].call_fd != -1) {
1404 close(dev->vq[index].call_fd);
1405 dev->vq[index].call_fd = -1;
1406 }
1407
1408 dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
1409
1410
1411 if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
1412 return -1;
1413 }
1414
1415 DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
1416
1417 return false;
1418}
1419
1420static bool
1421vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
1422{
1423 int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
1424 bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
1425
1426 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1427
1428 if (!vu_check_queue_msg_file(dev, vmsg)) {
1429 return false;
1430 }
1431
1432 if (dev->vq[index].err_fd != -1) {
1433 close(dev->vq[index].err_fd);
1434 dev->vq[index].err_fd = -1;
1435 }
1436
1437 dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
1438
1439 return false;
1440}
1441
1442static bool
1443vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
1444{
1445
1446
1447
1448
1449
1450
1451
1452
1453 uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
1454 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1455 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ |
1456 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1457 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD |
1458 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
1459 1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS;
1460
1461 if (have_userfault()) {
1462 features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
1463 }
1464
1465 if (dev->iface->get_config && dev->iface->set_config) {
1466 features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG;
1467 }
1468
1469 if (dev->iface->get_protocol_features) {
1470 features |= dev->iface->get_protocol_features(dev);
1471 }
1472
1473 vmsg_set_reply_u64(vmsg, features);
1474 return true;
1475}
1476
1477static bool
1478vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
1479{
1480 uint64_t features = vmsg->payload.u64;
1481
1482 DPRINT("u64: 0x%016"PRIx64"\n", features);
1483
1484 dev->protocol_features = vmsg->payload.u64;
1485
1486 if (vu_has_protocol_feature(dev,
1487 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
1488 (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ) ||
1489 !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 vu_panic(dev,
1501 "F_IN_BAND_NOTIFICATIONS requires F_SLAVE_REQ && F_REPLY_ACK");
1502 return false;
1503 }
1504
1505 if (dev->iface->set_protocol_features) {
1506 dev->iface->set_protocol_features(dev, features);
1507 }
1508
1509 return false;
1510}
1511
1512static bool
1513vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
1514{
1515 vmsg_set_reply_u64(vmsg, dev->max_queues);
1516 return true;
1517}
1518
1519static bool
1520vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
1521{
1522 unsigned int index = vmsg->payload.state.index;
1523 unsigned int enable = vmsg->payload.state.num;
1524
1525 DPRINT("State.index: %u\n", index);
1526 DPRINT("State.enable: %u\n", enable);
1527
1528 if (index >= dev->max_queues) {
1529 vu_panic(dev, "Invalid vring_enable index: %u", index);
1530 return false;
1531 }
1532
1533 dev->vq[index].enable = enable;
1534 return false;
1535}
1536
1537static bool
1538vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg)
1539{
1540 if (vmsg->fd_num != 1) {
1541 vu_panic(dev, "Invalid slave_req_fd message (%d fd's)", vmsg->fd_num);
1542 return false;
1543 }
1544
1545 if (dev->slave_fd != -1) {
1546 close(dev->slave_fd);
1547 }
1548 dev->slave_fd = vmsg->fds[0];
1549 DPRINT("Got slave_fd: %d\n", vmsg->fds[0]);
1550
1551 return false;
1552}
1553
1554static bool
1555vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
1556{
1557 int ret = -1;
1558
1559 if (dev->iface->get_config) {
1560 ret = dev->iface->get_config(dev, vmsg->payload.config.region,
1561 vmsg->payload.config.size);
1562 }
1563
1564 if (ret) {
1565
1566 vmsg->size = 0;
1567 }
1568
1569 return true;
1570}
1571
1572static bool
1573vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
1574{
1575 int ret = -1;
1576
1577 if (dev->iface->set_config) {
1578 ret = dev->iface->set_config(dev, vmsg->payload.config.region,
1579 vmsg->payload.config.offset,
1580 vmsg->payload.config.size,
1581 vmsg->payload.config.flags);
1582 if (ret) {
1583 vu_panic(dev, "Set virtio configuration space failed");
1584 }
1585 }
1586
1587 return false;
1588}
1589
1590static bool
1591vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg)
1592{
1593 dev->postcopy_ufd = -1;
1594#ifdef UFFDIO_API
1595 struct uffdio_api api_struct;
1596
1597 dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
1598 vmsg->size = 0;
1599#endif
1600
1601 if (dev->postcopy_ufd == -1) {
1602 vu_panic(dev, "Userfaultfd not available: %s", strerror(errno));
1603 goto out;
1604 }
1605
1606#ifdef UFFDIO_API
1607 api_struct.api = UFFD_API;
1608 api_struct.features = 0;
1609 if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
1610 vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno));
1611 close(dev->postcopy_ufd);
1612 dev->postcopy_ufd = -1;
1613 goto out;
1614 }
1615
1616#endif
1617
1618out:
1619
1620 vmsg->fd_num = 1;
1621 vmsg->fds[0] = dev->postcopy_ufd;
1622 return true;
1623}
1624
1625static bool
1626vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
1627{
1628 if (dev->nregions) {
1629 vu_panic(dev, "Regions already registered at postcopy-listen");
1630 vmsg_set_reply_u64(vmsg, -1);
1631 return true;
1632 }
1633 dev->postcopy_listening = true;
1634
1635 vmsg_set_reply_u64(vmsg, 0);
1636 return true;
1637}
1638
1639static bool
1640vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
1641{
1642 DPRINT("%s: Entry\n", __func__);
1643 dev->postcopy_listening = false;
1644 if (dev->postcopy_ufd > 0) {
1645 close(dev->postcopy_ufd);
1646 dev->postcopy_ufd = -1;
1647 DPRINT("%s: Done close\n", __func__);
1648 }
1649
1650 vmsg_set_reply_u64(vmsg, 0);
1651 DPRINT("%s: exit\n", __func__);
1652 return true;
1653}
1654
1655static inline uint64_t
1656vu_inflight_queue_size(uint16_t queue_size)
1657{
1658 return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
1659 sizeof(uint16_t), INFLIGHT_ALIGNMENT);
1660}
1661
1662#ifdef MFD_ALLOW_SEALING
1663static void *
1664memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
1665{
1666 void *ptr;
1667 int ret;
1668
1669 *fd = memfd_create(name, MFD_ALLOW_SEALING);
1670 if (*fd < 0) {
1671 return NULL;
1672 }
1673
1674 ret = ftruncate(*fd, size);
1675 if (ret < 0) {
1676 close(*fd);
1677 return NULL;
1678 }
1679
1680 ret = fcntl(*fd, F_ADD_SEALS, flags);
1681 if (ret < 0) {
1682 close(*fd);
1683 return NULL;
1684 }
1685
1686 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
1687 if (ptr == MAP_FAILED) {
1688 close(*fd);
1689 return NULL;
1690 }
1691
1692 return ptr;
1693}
1694#endif
1695
1696static bool
1697vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
1698{
1699 int fd = -1;
1700 void *addr = NULL;
1701 uint64_t mmap_size;
1702 uint16_t num_queues, queue_size;
1703
1704 if (vmsg->size != sizeof(vmsg->payload.inflight)) {
1705 vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
1706 vmsg->payload.inflight.mmap_size = 0;
1707 return true;
1708 }
1709
1710 num_queues = vmsg->payload.inflight.num_queues;
1711 queue_size = vmsg->payload.inflight.queue_size;
1712
1713 DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
1714 DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
1715
1716 mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
1717
1718#ifdef MFD_ALLOW_SEALING
1719 addr = memfd_alloc("vhost-inflight", mmap_size,
1720 F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
1721 &fd);
1722#else
1723 vu_panic(dev, "Not implemented: memfd support is missing");
1724#endif
1725
1726 if (!addr) {
1727 vu_panic(dev, "Failed to alloc vhost inflight area");
1728 vmsg->payload.inflight.mmap_size = 0;
1729 return true;
1730 }
1731
1732 memset(addr, 0, mmap_size);
1733
1734 dev->inflight_info.addr = addr;
1735 dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
1736 dev->inflight_info.fd = vmsg->fds[0] = fd;
1737 vmsg->fd_num = 1;
1738 vmsg->payload.inflight.mmap_offset = 0;
1739
1740 DPRINT("send inflight mmap_size: %"PRId64"\n",
1741 vmsg->payload.inflight.mmap_size);
1742 DPRINT("send inflight mmap offset: %"PRId64"\n",
1743 vmsg->payload.inflight.mmap_offset);
1744
1745 return true;
1746}
1747
1748static bool
1749vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
1750{
1751 int fd, i;
1752 uint64_t mmap_size, mmap_offset;
1753 uint16_t num_queues, queue_size;
1754 void *rc;
1755
1756 if (vmsg->fd_num != 1 ||
1757 vmsg->size != sizeof(vmsg->payload.inflight)) {
1758 vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
1759 vmsg->size, vmsg->fd_num);
1760 return false;
1761 }
1762
1763 fd = vmsg->fds[0];
1764 mmap_size = vmsg->payload.inflight.mmap_size;
1765 mmap_offset = vmsg->payload.inflight.mmap_offset;
1766 num_queues = vmsg->payload.inflight.num_queues;
1767 queue_size = vmsg->payload.inflight.queue_size;
1768
1769 DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
1770 DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
1771 DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
1772 DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
1773
1774 rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1775 fd, mmap_offset);
1776
1777 if (rc == MAP_FAILED) {
1778 vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
1779 return false;
1780 }
1781
1782 if (dev->inflight_info.fd) {
1783 close(dev->inflight_info.fd);
1784 }
1785
1786 if (dev->inflight_info.addr) {
1787 munmap(dev->inflight_info.addr, dev->inflight_info.size);
1788 }
1789
1790 dev->inflight_info.fd = fd;
1791 dev->inflight_info.addr = rc;
1792 dev->inflight_info.size = mmap_size;
1793
1794 for (i = 0; i < num_queues; i++) {
1795 dev->vq[i].inflight = (VuVirtqInflight *)rc;
1796 dev->vq[i].inflight->desc_num = queue_size;
1797 rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
1798 }
1799
1800 return false;
1801}
1802
1803static bool
1804vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
1805{
1806 unsigned int index = vmsg->payload.state.index;
1807
1808 if (index >= dev->max_queues) {
1809 vu_panic(dev, "Invalid queue index: %u", index);
1810 return false;
1811 }
1812
1813 DPRINT("Got kick message: handler:%p idx:%u\n",
1814 dev->vq[index].handler, index);
1815
1816 if (!dev->vq[index].started) {
1817 dev->vq[index].started = true;
1818
1819 if (dev->iface->queue_set_started) {
1820 dev->iface->queue_set_started(dev, index, true);
1821 }
1822 }
1823
1824 if (dev->vq[index].handler) {
1825 dev->vq[index].handler(dev, index);
1826 }
1827
1828 return false;
1829}
1830
1831static bool vu_handle_get_max_memslots(VuDev *dev, VhostUserMsg *vmsg)
1832{
1833 vmsg->flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
1834 vmsg->size = sizeof(vmsg->payload.u64);
1835 vmsg->payload.u64 = VHOST_USER_MAX_RAM_SLOTS;
1836 vmsg->fd_num = 0;
1837
1838 if (!vu_message_write(dev, dev->sock, vmsg)) {
1839 vu_panic(dev, "Failed to send max ram slots: %s\n", strerror(errno));
1840 }
1841
1842 DPRINT("u64: 0x%016"PRIx64"\n", (uint64_t) VHOST_USER_MAX_RAM_SLOTS);
1843
1844 return false;
1845}
1846
1847static bool
1848vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
1849{
1850 int do_reply = 0;
1851
1852
1853 DPRINT("================ Vhost user message ================\n");
1854 DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
1855 vmsg->request);
1856 DPRINT("Flags: 0x%x\n", vmsg->flags);
1857 DPRINT("Size: %u\n", vmsg->size);
1858
1859 if (vmsg->fd_num) {
1860 int i;
1861 DPRINT("Fds:");
1862 for (i = 0; i < vmsg->fd_num; i++) {
1863 DPRINT(" %d", vmsg->fds[i]);
1864 }
1865 DPRINT("\n");
1866 }
1867
1868 if (dev->iface->process_msg &&
1869 dev->iface->process_msg(dev, vmsg, &do_reply)) {
1870 return do_reply;
1871 }
1872
1873 switch (vmsg->request) {
1874 case VHOST_USER_GET_FEATURES:
1875 return vu_get_features_exec(dev, vmsg);
1876 case VHOST_USER_SET_FEATURES:
1877 return vu_set_features_exec(dev, vmsg);
1878 case VHOST_USER_GET_PROTOCOL_FEATURES:
1879 return vu_get_protocol_features_exec(dev, vmsg);
1880 case VHOST_USER_SET_PROTOCOL_FEATURES:
1881 return vu_set_protocol_features_exec(dev, vmsg);
1882 case VHOST_USER_SET_OWNER:
1883 return vu_set_owner_exec(dev, vmsg);
1884 case VHOST_USER_RESET_OWNER:
1885 return vu_reset_device_exec(dev, vmsg);
1886 case VHOST_USER_SET_MEM_TABLE:
1887 return vu_set_mem_table_exec(dev, vmsg);
1888 case VHOST_USER_SET_LOG_BASE:
1889 return vu_set_log_base_exec(dev, vmsg);
1890 case VHOST_USER_SET_LOG_FD:
1891 return vu_set_log_fd_exec(dev, vmsg);
1892 case VHOST_USER_SET_VRING_NUM:
1893 return vu_set_vring_num_exec(dev, vmsg);
1894 case VHOST_USER_SET_VRING_ADDR:
1895 return vu_set_vring_addr_exec(dev, vmsg);
1896 case VHOST_USER_SET_VRING_BASE:
1897 return vu_set_vring_base_exec(dev, vmsg);
1898 case VHOST_USER_GET_VRING_BASE:
1899 return vu_get_vring_base_exec(dev, vmsg);
1900 case VHOST_USER_SET_VRING_KICK:
1901 return vu_set_vring_kick_exec(dev, vmsg);
1902 case VHOST_USER_SET_VRING_CALL:
1903 return vu_set_vring_call_exec(dev, vmsg);
1904 case VHOST_USER_SET_VRING_ERR:
1905 return vu_set_vring_err_exec(dev, vmsg);
1906 case VHOST_USER_GET_QUEUE_NUM:
1907 return vu_get_queue_num_exec(dev, vmsg);
1908 case VHOST_USER_SET_VRING_ENABLE:
1909 return vu_set_vring_enable_exec(dev, vmsg);
1910 case VHOST_USER_SET_SLAVE_REQ_FD:
1911 return vu_set_slave_req_fd(dev, vmsg);
1912 case VHOST_USER_GET_CONFIG:
1913 return vu_get_config(dev, vmsg);
1914 case VHOST_USER_SET_CONFIG:
1915 return vu_set_config(dev, vmsg);
1916 case VHOST_USER_NONE:
1917
1918 exit(0);
1919 case VHOST_USER_POSTCOPY_ADVISE:
1920 return vu_set_postcopy_advise(dev, vmsg);
1921 case VHOST_USER_POSTCOPY_LISTEN:
1922 return vu_set_postcopy_listen(dev, vmsg);
1923 case VHOST_USER_POSTCOPY_END:
1924 return vu_set_postcopy_end(dev, vmsg);
1925 case VHOST_USER_GET_INFLIGHT_FD:
1926 return vu_get_inflight_fd(dev, vmsg);
1927 case VHOST_USER_SET_INFLIGHT_FD:
1928 return vu_set_inflight_fd(dev, vmsg);
1929 case VHOST_USER_VRING_KICK:
1930 return vu_handle_vring_kick(dev, vmsg);
1931 case VHOST_USER_GET_MAX_MEM_SLOTS:
1932 return vu_handle_get_max_memslots(dev, vmsg);
1933 case VHOST_USER_ADD_MEM_REG:
1934 return vu_add_mem_reg(dev, vmsg);
1935 case VHOST_USER_REM_MEM_REG:
1936 return vu_rem_mem_reg(dev, vmsg);
1937 default:
1938 vmsg_close_fds(vmsg);
1939 vu_panic(dev, "Unhandled request: %d", vmsg->request);
1940 }
1941
1942 return false;
1943}
1944
1945bool
1946vu_dispatch(VuDev *dev)
1947{
1948 VhostUserMsg vmsg = { 0, };
1949 int reply_requested;
1950 bool need_reply, success = false;
1951
1952 if (!dev->read_msg(dev, dev->sock, &vmsg)) {
1953 goto end;
1954 }
1955
1956 need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
1957
1958 reply_requested = vu_process_message(dev, &vmsg);
1959 if (!reply_requested && need_reply) {
1960 vmsg_set_reply_u64(&vmsg, 0);
1961 reply_requested = 1;
1962 }
1963
1964 if (!reply_requested) {
1965 success = true;
1966 goto end;
1967 }
1968
1969 if (!vu_send_reply(dev, dev->sock, &vmsg)) {
1970 goto end;
1971 }
1972
1973 success = true;
1974
1975end:
1976 free(vmsg.data);
1977 return success;
1978}
1979
1980void
1981vu_deinit(VuDev *dev)
1982{
1983 int i;
1984
1985 for (i = 0; i < dev->nregions; i++) {
1986 VuDevRegion *r = &dev->regions[i];
1987 void *m = (void *) (uintptr_t) r->mmap_addr;
1988 if (m != MAP_FAILED) {
1989 munmap(m, r->size + r->mmap_offset);
1990 }
1991 }
1992 dev->nregions = 0;
1993
1994 for (i = 0; i < dev->max_queues; i++) {
1995 VuVirtq *vq = &dev->vq[i];
1996
1997 if (vq->call_fd != -1) {
1998 close(vq->call_fd);
1999 vq->call_fd = -1;
2000 }
2001
2002 if (vq->kick_fd != -1) {
2003 dev->remove_watch(dev, vq->kick_fd);
2004 close(vq->kick_fd);
2005 vq->kick_fd = -1;
2006 }
2007
2008 if (vq->err_fd != -1) {
2009 close(vq->err_fd);
2010 vq->err_fd = -1;
2011 }
2012
2013 if (vq->resubmit_list) {
2014 free(vq->resubmit_list);
2015 vq->resubmit_list = NULL;
2016 }
2017
2018 vq->inflight = NULL;
2019 }
2020
2021 if (dev->inflight_info.addr) {
2022 munmap(dev->inflight_info.addr, dev->inflight_info.size);
2023 dev->inflight_info.addr = NULL;
2024 }
2025
2026 if (dev->inflight_info.fd > 0) {
2027 close(dev->inflight_info.fd);
2028 dev->inflight_info.fd = -1;
2029 }
2030
2031 vu_close_log(dev);
2032 if (dev->slave_fd != -1) {
2033 close(dev->slave_fd);
2034 dev->slave_fd = -1;
2035 }
2036 pthread_mutex_destroy(&dev->slave_mutex);
2037
2038 if (dev->sock != -1) {
2039 close(dev->sock);
2040 }
2041
2042 free(dev->vq);
2043 dev->vq = NULL;
2044}
2045
2046bool
2047vu_init(VuDev *dev,
2048 uint16_t max_queues,
2049 int socket,
2050 vu_panic_cb panic,
2051 vu_read_msg_cb read_msg,
2052 vu_set_watch_cb set_watch,
2053 vu_remove_watch_cb remove_watch,
2054 const VuDevIface *iface)
2055{
2056 uint16_t i;
2057
2058 assert(max_queues > 0);
2059 assert(socket >= 0);
2060 assert(set_watch);
2061 assert(remove_watch);
2062 assert(iface);
2063 assert(panic);
2064
2065 memset(dev, 0, sizeof(*dev));
2066
2067 dev->sock = socket;
2068 dev->panic = panic;
2069 dev->read_msg = read_msg ? read_msg : vu_message_read_default;
2070 dev->set_watch = set_watch;
2071 dev->remove_watch = remove_watch;
2072 dev->iface = iface;
2073 dev->log_call_fd = -1;
2074 pthread_mutex_init(&dev->slave_mutex, NULL);
2075 dev->slave_fd = -1;
2076 dev->max_queues = max_queues;
2077
2078 dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
2079 if (!dev->vq) {
2080 DPRINT("%s: failed to malloc virtqueues\n", __func__);
2081 return false;
2082 }
2083
2084 for (i = 0; i < max_queues; i++) {
2085 dev->vq[i] = (VuVirtq) {
2086 .call_fd = -1, .kick_fd = -1, .err_fd = -1,
2087 .notification = true,
2088 };
2089 }
2090
2091 return true;
2092}
2093
2094VuVirtq *
2095vu_get_queue(VuDev *dev, int qidx)
2096{
2097 assert(qidx < dev->max_queues);
2098 return &dev->vq[qidx];
2099}
2100
2101bool
2102vu_queue_enabled(VuDev *dev, VuVirtq *vq)
2103{
2104 return vq->enable;
2105}
2106
2107bool
2108vu_queue_started(const VuDev *dev, const VuVirtq *vq)
2109{
2110 return vq->started;
2111}
2112
2113static inline uint16_t
2114vring_avail_flags(VuVirtq *vq)
2115{
2116 return le16toh(vq->vring.avail->flags);
2117}
2118
2119static inline uint16_t
2120vring_avail_idx(VuVirtq *vq)
2121{
2122 vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
2123
2124 return vq->shadow_avail_idx;
2125}
2126
2127static inline uint16_t
2128vring_avail_ring(VuVirtq *vq, int i)
2129{
2130 return le16toh(vq->vring.avail->ring[i]);
2131}
2132
2133static inline uint16_t
2134vring_get_used_event(VuVirtq *vq)
2135{
2136 return vring_avail_ring(vq, vq->vring.num);
2137}
2138
2139static int
2140virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
2141{
2142 uint16_t num_heads = vring_avail_idx(vq) - idx;
2143
2144
2145 if (num_heads > vq->vring.num) {
2146 vu_panic(dev, "Guest moved used index from %u to %u",
2147 idx, vq->shadow_avail_idx);
2148 return -1;
2149 }
2150 if (num_heads) {
2151
2152
2153 smp_rmb();
2154 }
2155
2156 return num_heads;
2157}
2158
2159static bool
2160virtqueue_get_head(VuDev *dev, VuVirtq *vq,
2161 unsigned int idx, unsigned int *head)
2162{
2163
2164
2165 *head = vring_avail_ring(vq, idx % vq->vring.num);
2166
2167
2168 if (*head >= vq->vring.num) {
2169 vu_panic(dev, "Guest says index %u is available", *head);
2170 return false;
2171 }
2172
2173 return true;
2174}
2175
2176static int
2177virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc,
2178 uint64_t addr, size_t len)
2179{
2180 struct vring_desc *ori_desc;
2181 uint64_t read_len;
2182
2183 if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
2184 return -1;
2185 }
2186
2187 if (len == 0) {
2188 return -1;
2189 }
2190
2191 while (len) {
2192 read_len = len;
2193 ori_desc = vu_gpa_to_va(dev, &read_len, addr);
2194 if (!ori_desc) {
2195 return -1;
2196 }
2197
2198 memcpy(desc, ori_desc, read_len);
2199 len -= read_len;
2200 addr += read_len;
2201 desc += read_len;
2202 }
2203
2204 return 0;
2205}
2206
2207enum {
2208 VIRTQUEUE_READ_DESC_ERROR = -1,
2209 VIRTQUEUE_READ_DESC_DONE = 0,
2210 VIRTQUEUE_READ_DESC_MORE = 1,
2211};
2212
2213static int
2214virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
2215 int i, unsigned int max, unsigned int *next)
2216{
2217
2218 if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
2219 return VIRTQUEUE_READ_DESC_DONE;
2220 }
2221
2222
2223 *next = le16toh(desc[i].next);
2224
2225 smp_wmb();
2226
2227 if (*next >= max) {
2228 vu_panic(dev, "Desc next is %u", *next);
2229 return VIRTQUEUE_READ_DESC_ERROR;
2230 }
2231
2232 return VIRTQUEUE_READ_DESC_MORE;
2233}
2234
2235void
2236vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
2237 unsigned int *out_bytes,
2238 unsigned max_in_bytes, unsigned max_out_bytes)
2239{
2240 unsigned int idx;
2241 unsigned int total_bufs, in_total, out_total;
2242 int rc;
2243
2244 idx = vq->last_avail_idx;
2245
2246 total_bufs = in_total = out_total = 0;
2247 if (unlikely(dev->broken) ||
2248 unlikely(!vq->vring.avail)) {
2249 goto done;
2250 }
2251
2252 while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
2253 unsigned int max, desc_len, num_bufs, indirect = 0;
2254 uint64_t desc_addr, read_len;
2255 struct vring_desc *desc;
2256 struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
2257 unsigned int i;
2258
2259 max = vq->vring.num;
2260 num_bufs = total_bufs;
2261 if (!virtqueue_get_head(dev, vq, idx++, &i)) {
2262 goto err;
2263 }
2264 desc = vq->vring.desc;
2265
2266 if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
2267 if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
2268 vu_panic(dev, "Invalid size for indirect buffer table");
2269 goto err;
2270 }
2271
2272
2273 if (num_bufs >= max) {
2274 vu_panic(dev, "Looped descriptor");
2275 goto err;
2276 }
2277
2278
2279 indirect = 1;
2280 desc_addr = le64toh(desc[i].addr);
2281 desc_len = le32toh(desc[i].len);
2282 max = desc_len / sizeof(struct vring_desc);
2283 read_len = desc_len;
2284 desc = vu_gpa_to_va(dev, &read_len, desc_addr);
2285 if (unlikely(desc && read_len != desc_len)) {
2286
2287 desc = NULL;
2288 if (!virtqueue_read_indirect_desc(dev, desc_buf,
2289 desc_addr,
2290 desc_len)) {
2291 desc = desc_buf;
2292 }
2293 }
2294 if (!desc) {
2295 vu_panic(dev, "Invalid indirect buffer table");
2296 goto err;
2297 }
2298 num_bufs = i = 0;
2299 }
2300
2301 do {
2302
2303 if (++num_bufs > max) {
2304 vu_panic(dev, "Looped descriptor");
2305 goto err;
2306 }
2307
2308 if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
2309 in_total += le32toh(desc[i].len);
2310 } else {
2311 out_total += le32toh(desc[i].len);
2312 }
2313 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
2314 goto done;
2315 }
2316 rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
2317 } while (rc == VIRTQUEUE_READ_DESC_MORE);
2318
2319 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
2320 goto err;
2321 }
2322
2323 if (!indirect) {
2324 total_bufs = num_bufs;
2325 } else {
2326 total_bufs++;
2327 }
2328 }
2329 if (rc < 0) {
2330 goto err;
2331 }
2332done:
2333 if (in_bytes) {
2334 *in_bytes = in_total;
2335 }
2336 if (out_bytes) {
2337 *out_bytes = out_total;
2338 }
2339 return;
2340
2341err:
2342 in_total = out_total = 0;
2343 goto done;
2344}
2345
2346bool
2347vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
2348 unsigned int out_bytes)
2349{
2350 unsigned int in_total, out_total;
2351
2352 vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
2353 in_bytes, out_bytes);
2354
2355 return in_bytes <= in_total && out_bytes <= out_total;
2356}
2357
2358
2359
2360bool
2361vu_queue_empty(VuDev *dev, VuVirtq *vq)
2362{
2363 if (unlikely(dev->broken) ||
2364 unlikely(!vq->vring.avail)) {
2365 return true;
2366 }
2367
2368 if (vq->shadow_avail_idx != vq->last_avail_idx) {
2369 return false;
2370 }
2371
2372 return vring_avail_idx(vq) == vq->last_avail_idx;
2373}
2374
2375static bool
2376vring_notify(VuDev *dev, VuVirtq *vq)
2377{
2378 uint16_t old, new;
2379 bool v;
2380
2381
2382 smp_mb();
2383
2384
2385 if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2386 !vq->inuse && vu_queue_empty(dev, vq)) {
2387 return true;
2388 }
2389
2390 if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
2391 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2392 }
2393
2394 v = vq->signalled_used_valid;
2395 vq->signalled_used_valid = true;
2396 old = vq->signalled_used;
2397 new = vq->signalled_used = vq->used_idx;
2398 return !v || vring_need_event(vring_get_used_event(vq), new, old);
2399}
2400
2401static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
2402{
2403 if (unlikely(dev->broken) ||
2404 unlikely(!vq->vring.avail)) {
2405 return;
2406 }
2407
2408 if (!vring_notify(dev, vq)) {
2409 DPRINT("skipped notify...\n");
2410 return;
2411 }
2412
2413 if (vq->call_fd < 0 &&
2414 vu_has_protocol_feature(dev,
2415 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
2416 vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
2417 VhostUserMsg vmsg = {
2418 .request = VHOST_USER_SLAVE_VRING_CALL,
2419 .flags = VHOST_USER_VERSION,
2420 .size = sizeof(vmsg.payload.state),
2421 .payload.state = {
2422 .index = vq - dev->vq,
2423 },
2424 };
2425 bool ack = sync &&
2426 vu_has_protocol_feature(dev,
2427 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2428
2429 if (ack) {
2430 vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
2431 }
2432
2433 vu_message_write(dev, dev->slave_fd, &vmsg);
2434 if (ack) {
2435 vu_message_read_default(dev, dev->slave_fd, &vmsg);
2436 }
2437 return;
2438 }
2439
2440 if (eventfd_write(vq->call_fd, 1) < 0) {
2441 vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
2442 }
2443}
2444
2445void vu_queue_notify(VuDev *dev, VuVirtq *vq)
2446{
2447 _vu_queue_notify(dev, vq, false);
2448}
2449
2450void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
2451{
2452 _vu_queue_notify(dev, vq, true);
2453}
2454
2455static inline void
2456vring_used_flags_set_bit(VuVirtq *vq, int mask)
2457{
2458 uint16_t *flags;
2459
2460 flags = (uint16_t *)((char*)vq->vring.used +
2461 offsetof(struct vring_used, flags));
2462 *flags = htole16(le16toh(*flags) | mask);
2463}
2464
2465static inline void
2466vring_used_flags_unset_bit(VuVirtq *vq, int mask)
2467{
2468 uint16_t *flags;
2469
2470 flags = (uint16_t *)((char*)vq->vring.used +
2471 offsetof(struct vring_used, flags));
2472 *flags = htole16(le16toh(*flags) & ~mask);
2473}
2474
2475static inline void
2476vring_set_avail_event(VuVirtq *vq, uint16_t val)
2477{
2478 uint16_t *avail;
2479
2480 if (!vq->notification) {
2481 return;
2482 }
2483
2484 avail = (uint16_t *)&vq->vring.used->ring[vq->vring.num];
2485 *avail = htole16(val);
2486}
2487
2488void
2489vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
2490{
2491 vq->notification = enable;
2492 if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
2493 vring_set_avail_event(vq, vring_avail_idx(vq));
2494 } else if (enable) {
2495 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
2496 } else {
2497 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
2498 }
2499 if (enable) {
2500
2501 smp_mb();
2502 }
2503}
2504
2505static bool
2506virtqueue_map_desc(VuDev *dev,
2507 unsigned int *p_num_sg, struct iovec *iov,
2508 unsigned int max_num_sg, bool is_write,
2509 uint64_t pa, size_t sz)
2510{
2511 unsigned num_sg = *p_num_sg;
2512
2513 assert(num_sg <= max_num_sg);
2514
2515 if (!sz) {
2516 vu_panic(dev, "virtio: zero sized buffers are not allowed");
2517 return false;
2518 }
2519
2520 while (sz) {
2521 uint64_t len = sz;
2522
2523 if (num_sg == max_num_sg) {
2524 vu_panic(dev, "virtio: too many descriptors in indirect table");
2525 return false;
2526 }
2527
2528 iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
2529 if (iov[num_sg].iov_base == NULL) {
2530 vu_panic(dev, "virtio: invalid address for buffers");
2531 return false;
2532 }
2533 iov[num_sg].iov_len = len;
2534 num_sg++;
2535 sz -= len;
2536 pa += len;
2537 }
2538
2539 *p_num_sg = num_sg;
2540 return true;
2541}
2542
2543static void *
2544virtqueue_alloc_element(size_t sz,
2545 unsigned out_num, unsigned in_num)
2546{
2547 VuVirtqElement *elem;
2548 size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
2549 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
2550 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
2551
2552 assert(sz >= sizeof(VuVirtqElement));
2553 elem = malloc(out_sg_end);
2554 elem->out_num = out_num;
2555 elem->in_num = in_num;
2556 elem->in_sg = (void *)elem + in_sg_ofs;
2557 elem->out_sg = (void *)elem + out_sg_ofs;
2558 return elem;
2559}
2560
2561static void *
2562vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
2563{
2564 struct vring_desc *desc = vq->vring.desc;
2565 uint64_t desc_addr, read_len;
2566 unsigned int desc_len;
2567 unsigned int max = vq->vring.num;
2568 unsigned int i = idx;
2569 VuVirtqElement *elem;
2570 unsigned int out_num = 0, in_num = 0;
2571 struct iovec iov[VIRTQUEUE_MAX_SIZE];
2572 struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
2573 int rc;
2574
2575 if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
2576 if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
2577 vu_panic(dev, "Invalid size for indirect buffer table");
2578 return NULL;
2579 }
2580
2581
2582 desc_addr = le64toh(desc[i].addr);
2583 desc_len = le32toh(desc[i].len);
2584 max = desc_len / sizeof(struct vring_desc);
2585 read_len = desc_len;
2586 desc = vu_gpa_to_va(dev, &read_len, desc_addr);
2587 if (unlikely(desc && read_len != desc_len)) {
2588
2589 desc = NULL;
2590 if (!virtqueue_read_indirect_desc(dev, desc_buf,
2591 desc_addr,
2592 desc_len)) {
2593 desc = desc_buf;
2594 }
2595 }
2596 if (!desc) {
2597 vu_panic(dev, "Invalid indirect buffer table");
2598 return NULL;
2599 }
2600 i = 0;
2601 }
2602
2603
2604 do {
2605 if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
2606 if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
2607 VIRTQUEUE_MAX_SIZE - out_num, true,
2608 le64toh(desc[i].addr),
2609 le32toh(desc[i].len))) {
2610 return NULL;
2611 }
2612 } else {
2613 if (in_num) {
2614 vu_panic(dev, "Incorrect order for descriptors");
2615 return NULL;
2616 }
2617 if (!virtqueue_map_desc(dev, &out_num, iov,
2618 VIRTQUEUE_MAX_SIZE, false,
2619 le64toh(desc[i].addr),
2620 le32toh(desc[i].len))) {
2621 return NULL;
2622 }
2623 }
2624
2625
2626 if ((in_num + out_num) > max) {
2627 vu_panic(dev, "Looped descriptor");
2628 return NULL;
2629 }
2630 rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
2631 } while (rc == VIRTQUEUE_READ_DESC_MORE);
2632
2633 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
2634 vu_panic(dev, "read descriptor error");
2635 return NULL;
2636 }
2637
2638
2639 elem = virtqueue_alloc_element(sz, out_num, in_num);
2640 elem->index = idx;
2641 for (i = 0; i < out_num; i++) {
2642 elem->out_sg[i] = iov[i];
2643 }
2644 for (i = 0; i < in_num; i++) {
2645 elem->in_sg[i] = iov[out_num + i];
2646 }
2647
2648 return elem;
2649}
2650
2651static int
2652vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
2653{
2654 if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2655 return 0;
2656 }
2657
2658 if (unlikely(!vq->inflight)) {
2659 return -1;
2660 }
2661
2662 vq->inflight->desc[desc_idx].counter = vq->counter++;
2663 vq->inflight->desc[desc_idx].inflight = 1;
2664
2665 return 0;
2666}
2667
2668static int
2669vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
2670{
2671 if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2672 return 0;
2673 }
2674
2675 if (unlikely(!vq->inflight)) {
2676 return -1;
2677 }
2678
2679 vq->inflight->last_batch_head = desc_idx;
2680
2681 return 0;
2682}
2683
2684static int
2685vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
2686{
2687 if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2688 return 0;
2689 }
2690
2691 if (unlikely(!vq->inflight)) {
2692 return -1;
2693 }
2694
2695 barrier();
2696
2697 vq->inflight->desc[desc_idx].inflight = 0;
2698
2699 barrier();
2700
2701 vq->inflight->used_idx = vq->used_idx;
2702
2703 return 0;
2704}
2705
2706void *
2707vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
2708{
2709 int i;
2710 unsigned int head;
2711 VuVirtqElement *elem;
2712
2713 if (unlikely(dev->broken) ||
2714 unlikely(!vq->vring.avail)) {
2715 return NULL;
2716 }
2717
2718 if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
2719 i = (--vq->resubmit_num);
2720 elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
2721
2722 if (!vq->resubmit_num) {
2723 free(vq->resubmit_list);
2724 vq->resubmit_list = NULL;
2725 }
2726
2727 return elem;
2728 }
2729
2730 if (vu_queue_empty(dev, vq)) {
2731 return NULL;
2732 }
2733
2734
2735
2736
2737 smp_rmb();
2738
2739 if (vq->inuse >= vq->vring.num) {
2740 vu_panic(dev, "Virtqueue size exceeded");
2741 return NULL;
2742 }
2743
2744 if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
2745 return NULL;
2746 }
2747
2748 if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
2749 vring_set_avail_event(vq, vq->last_avail_idx);
2750 }
2751
2752 elem = vu_queue_map_desc(dev, vq, head, sz);
2753
2754 if (!elem) {
2755 return NULL;
2756 }
2757
2758 vq->inuse++;
2759
2760 vu_queue_inflight_get(dev, vq, head);
2761
2762 return elem;
2763}
2764
2765static void
2766vu_queue_detach_element(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
2767 size_t len)
2768{
2769 vq->inuse--;
2770
2771}
2772
2773void
2774vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
2775 size_t len)
2776{
2777 vq->last_avail_idx--;
2778 vu_queue_detach_element(dev, vq, elem, len);
2779}
2780
2781bool
2782vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
2783{
2784 if (num > vq->inuse) {
2785 return false;
2786 }
2787 vq->last_avail_idx -= num;
2788 vq->inuse -= num;
2789 return true;
2790}
2791
2792static inline
2793void vring_used_write(VuDev *dev, VuVirtq *vq,
2794 struct vring_used_elem *uelem, int i)
2795{
2796 struct vring_used *used = vq->vring.used;
2797
2798 used->ring[i] = *uelem;
2799 vu_log_write(dev, vq->vring.log_guest_addr +
2800 offsetof(struct vring_used, ring[i]),
2801 sizeof(used->ring[i]));
2802}
2803
2804
2805static void
2806vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
2807 const VuVirtqElement *elem,
2808 unsigned int len)
2809{
2810 struct vring_desc *desc = vq->vring.desc;
2811 unsigned int i, max, min, desc_len;
2812 uint64_t desc_addr, read_len;
2813 struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
2814 unsigned num_bufs = 0;
2815
2816 max = vq->vring.num;
2817 i = elem->index;
2818
2819 if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
2820 if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
2821 vu_panic(dev, "Invalid size for indirect buffer table");
2822 return;
2823 }
2824
2825
2826 desc_addr = le64toh(desc[i].addr);
2827 desc_len = le32toh(desc[i].len);
2828 max = desc_len / sizeof(struct vring_desc);
2829 read_len = desc_len;
2830 desc = vu_gpa_to_va(dev, &read_len, desc_addr);
2831 if (unlikely(desc && read_len != desc_len)) {
2832
2833 desc = NULL;
2834 if (!virtqueue_read_indirect_desc(dev, desc_buf,
2835 desc_addr,
2836 desc_len)) {
2837 desc = desc_buf;
2838 }
2839 }
2840 if (!desc) {
2841 vu_panic(dev, "Invalid indirect buffer table");
2842 return;
2843 }
2844 i = 0;
2845 }
2846
2847 do {
2848 if (++num_bufs > max) {
2849 vu_panic(dev, "Looped descriptor");
2850 return;
2851 }
2852
2853 if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
2854 min = MIN(le32toh(desc[i].len), len);
2855 vu_log_write(dev, le64toh(desc[i].addr), min);
2856 len -= min;
2857 }
2858
2859 } while (len > 0 &&
2860 (virtqueue_read_next_desc(dev, desc, i, max, &i)
2861 == VIRTQUEUE_READ_DESC_MORE));
2862}
2863
2864void
2865vu_queue_fill(VuDev *dev, VuVirtq *vq,
2866 const VuVirtqElement *elem,
2867 unsigned int len, unsigned int idx)
2868{
2869 struct vring_used_elem uelem;
2870
2871 if (unlikely(dev->broken) ||
2872 unlikely(!vq->vring.avail)) {
2873 return;
2874 }
2875
2876 vu_log_queue_fill(dev, vq, elem, len);
2877
2878 idx = (idx + vq->used_idx) % vq->vring.num;
2879
2880 uelem.id = htole32(elem->index);
2881 uelem.len = htole32(len);
2882 vring_used_write(dev, vq, &uelem, idx);
2883}
2884
2885static inline
2886void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
2887{
2888 vq->vring.used->idx = htole16(val);
2889 vu_log_write(dev,
2890 vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
2891 sizeof(vq->vring.used->idx));
2892
2893 vq->used_idx = val;
2894}
2895
2896void
2897vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
2898{
2899 uint16_t old, new;
2900
2901 if (unlikely(dev->broken) ||
2902 unlikely(!vq->vring.avail)) {
2903 return;
2904 }
2905
2906
2907 smp_wmb();
2908
2909 old = vq->used_idx;
2910 new = old + count;
2911 vring_used_idx_set(dev, vq, new);
2912 vq->inuse -= count;
2913 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
2914 vq->signalled_used_valid = false;
2915 }
2916}
2917
2918void
2919vu_queue_push(VuDev *dev, VuVirtq *vq,
2920 const VuVirtqElement *elem, unsigned int len)
2921{
2922 vu_queue_fill(dev, vq, elem, len, 0);
2923 vu_queue_inflight_pre_put(dev, vq, elem->index);
2924 vu_queue_flush(dev, vq, 1);
2925 vu_queue_inflight_post_put(dev, vq, elem->index);
2926}
2927