1
2
3
4
5
6
7
8
9
10
11
12
13#include <sys/ioctl.h>
14#include <sys/eventfd.h>
15#include "vhost.h"
16#include "hw/hw.h"
17
18#include "pci.h"
19#include <linux/vhost.h>
20
21static void vhost_dev_sync_region(struct vhost_dev *dev,
22 uint64_t mfirst, uint64_t mlast,
23 uint64_t rfirst, uint64_t rlast)
24{
25 uint64_t start = MAX(mfirst, rfirst);
26 uint64_t end = MIN(mlast, rlast);
27 vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
28 vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
29 uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
30
31 assert(end / VHOST_LOG_CHUNK < dev->log_size);
32 assert(start / VHOST_LOG_CHUNK < dev->log_size);
33 if (end < start) {
34 return;
35 }
36 for (;from < to; ++from) {
37 vhost_log_chunk_t log;
38 int bit;
39
40
41 if (!*from) {
42 continue;
43 }
44
45
46
47
48 log = __sync_fetch_and_and(from, 0);
49 while ((bit = sizeof(log) > sizeof(int) ?
50 ffsll(log) : ffs(log))) {
51 bit -= 1;
52 cpu_physical_memory_set_dirty(addr + bit * VHOST_LOG_PAGE);
53 log &= ~(0x1ull << bit);
54 }
55 addr += VHOST_LOG_CHUNK;
56 }
57}
58
59static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
60 target_phys_addr_t start_addr,
61 target_phys_addr_t end_addr)
62{
63 struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
64 int i;
65 if (!dev->log_enabled || !dev->started) {
66 return 0;
67 }
68 for (i = 0; i < dev->mem->nregions; ++i) {
69 struct vhost_memory_region *reg = dev->mem->regions + i;
70 vhost_dev_sync_region(dev, start_addr, end_addr,
71 reg->guest_phys_addr,
72 range_get_last(reg->guest_phys_addr,
73 reg->memory_size));
74 }
75 for (i = 0; i < dev->nvqs; ++i) {
76 struct vhost_virtqueue *vq = dev->vqs + i;
77 vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
78 range_get_last(vq->used_phys, vq->used_size));
79 }
80 return 0;
81}
82
83
84
85static void vhost_dev_unassign_memory(struct vhost_dev *dev,
86 uint64_t start_addr,
87 uint64_t size)
88{
89 int from, to, n = dev->mem->nregions;
90
91 int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
92
93 for (from = 0, to = 0; from < n; ++from, ++to) {
94 struct vhost_memory_region *reg = dev->mem->regions + to;
95 uint64_t reglast;
96 uint64_t memlast;
97 uint64_t change;
98
99
100 if (to != from) {
101 memcpy(reg, dev->mem->regions + from, sizeof *reg);
102 }
103
104
105 if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
106 start_addr, size)) {
107 continue;
108 }
109
110
111
112
113 assert(!split);
114
115 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
116 memlast = range_get_last(start_addr, size);
117
118
119 if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
120 --dev->mem->nregions;
121 --to;
122 assert(to >= 0);
123 ++overlap_middle;
124 continue;
125 }
126
127
128 if (memlast >= reglast) {
129 reg->memory_size = start_addr - reg->guest_phys_addr;
130 assert(reg->memory_size);
131 assert(!overlap_end);
132 ++overlap_end;
133 continue;
134 }
135
136
137 if (start_addr <= reg->guest_phys_addr) {
138 change = memlast + 1 - reg->guest_phys_addr;
139 reg->memory_size -= change;
140 reg->guest_phys_addr += change;
141 reg->userspace_addr += change;
142 assert(reg->memory_size);
143 assert(!overlap_start);
144 ++overlap_start;
145 continue;
146 }
147
148
149
150
151 assert(!overlap_start);
152 assert(!overlap_end);
153 assert(!overlap_middle);
154
155 memcpy(dev->mem->regions + n, reg, sizeof *reg);
156 reg->memory_size = start_addr - reg->guest_phys_addr;
157 assert(reg->memory_size);
158 change = memlast + 1 - reg->guest_phys_addr;
159 reg = dev->mem->regions + n;
160 reg->memory_size -= change;
161 assert(reg->memory_size);
162 reg->guest_phys_addr += change;
163 reg->userspace_addr += change;
164
165 assert(dev->mem->nregions == n);
166 ++dev->mem->nregions;
167 ++split;
168 }
169}
170
171
172static void vhost_dev_assign_memory(struct vhost_dev *dev,
173 uint64_t start_addr,
174 uint64_t size,
175 uint64_t uaddr)
176{
177 int from, to;
178 struct vhost_memory_region *merged = NULL;
179 for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
180 struct vhost_memory_region *reg = dev->mem->regions + to;
181 uint64_t prlast, urlast;
182 uint64_t pmlast, umlast;
183 uint64_t s, e, u;
184
185
186 if (to != from) {
187 memcpy(reg, dev->mem->regions + from, sizeof *reg);
188 }
189 prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
190 pmlast = range_get_last(start_addr, size);
191 urlast = range_get_last(reg->userspace_addr, reg->memory_size);
192 umlast = range_get_last(uaddr, size);
193
194
195 assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
196
197 if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
198 (pmlast + 1 != reg->guest_phys_addr ||
199 umlast + 1 != reg->userspace_addr)) {
200 continue;
201 }
202
203 if (merged) {
204 --to;
205 assert(to >= 0);
206 } else {
207 merged = reg;
208 }
209 u = MIN(uaddr, reg->userspace_addr);
210 s = MIN(start_addr, reg->guest_phys_addr);
211 e = MAX(pmlast, prlast);
212 uaddr = merged->userspace_addr = u;
213 start_addr = merged->guest_phys_addr = s;
214 size = merged->memory_size = e - s + 1;
215 assert(merged->memory_size);
216 }
217
218 if (!merged) {
219 struct vhost_memory_region *reg = dev->mem->regions + to;
220 memset(reg, 0, sizeof *reg);
221 reg->memory_size = size;
222 assert(reg->memory_size);
223 reg->guest_phys_addr = start_addr;
224 reg->userspace_addr = uaddr;
225 ++to;
226 }
227 assert(to <= dev->mem->nregions + 1);
228 dev->mem->nregions = to;
229}
230
231static uint64_t vhost_get_log_size(struct vhost_dev *dev)
232{
233 uint64_t log_size = 0;
234 int i;
235 for (i = 0; i < dev->mem->nregions; ++i) {
236 struct vhost_memory_region *reg = dev->mem->regions + i;
237 uint64_t last = range_get_last(reg->guest_phys_addr,
238 reg->memory_size);
239 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
240 }
241 for (i = 0; i < dev->nvqs; ++i) {
242 struct vhost_virtqueue *vq = dev->vqs + i;
243 uint64_t last = vq->used_phys + vq->used_size - 1;
244 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
245 }
246 return log_size;
247}
248
249static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
250{
251 vhost_log_chunk_t *log;
252 uint64_t log_base;
253 int r;
254 if (size) {
255 log = qemu_mallocz(size * sizeof *log);
256 } else {
257 log = NULL;
258 }
259 log_base = (uint64_t)(unsigned long)log;
260 r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
261 assert(r >= 0);
262 vhost_client_sync_dirty_bitmap(&dev->client, 0,
263 (target_phys_addr_t)~0x0ull);
264 if (dev->log) {
265 qemu_free(dev->log);
266 }
267 dev->log = log;
268 dev->log_size = size;
269}
270
271static int vhost_verify_ring_mappings(struct vhost_dev *dev,
272 uint64_t start_addr,
273 uint64_t size)
274{
275 int i;
276 for (i = 0; i < dev->nvqs; ++i) {
277 struct vhost_virtqueue *vq = dev->vqs + i;
278 target_phys_addr_t l;
279 void *p;
280
281 if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
282 continue;
283 }
284 l = vq->ring_size;
285 p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
286 if (!p || l != vq->ring_size) {
287 fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
288 return -ENOMEM;
289 }
290 if (p != vq->ring) {
291 fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
292 return -EBUSY;
293 }
294 cpu_physical_memory_unmap(p, l, 0, 0);
295 }
296 return 0;
297}
298
299static void vhost_client_set_memory(CPUPhysMemoryClient *client,
300 target_phys_addr_t start_addr,
301 ram_addr_t size,
302 ram_addr_t phys_offset)
303{
304 struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
305 ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
306 int s = offsetof(struct vhost_memory, regions) +
307 (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
308 uint64_t log_size;
309 int r;
310 dev->mem = qemu_realloc(dev->mem, s);
311
312 assert(size);
313
314 vhost_dev_unassign_memory(dev, start_addr, size);
315 if (flags == IO_MEM_RAM) {
316
317 vhost_dev_assign_memory(dev, start_addr, size,
318 (uintptr_t)qemu_get_ram_ptr(phys_offset));
319 } else {
320
321 vhost_dev_unassign_memory(dev, start_addr, size);
322 }
323
324 if (!dev->started) {
325 return;
326 }
327
328 if (dev->started) {
329 r = vhost_verify_ring_mappings(dev, start_addr, size);
330 assert(r >= 0);
331 }
332
333 if (!dev->log_enabled) {
334 r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
335 assert(r >= 0);
336 return;
337 }
338 log_size = vhost_get_log_size(dev);
339
340
341#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
342
343 if (dev->log_size < log_size) {
344 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
345 }
346 r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
347 assert(r >= 0);
348
349 if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
350 vhost_dev_log_resize(dev, log_size);
351 }
352}
353
354static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
355 struct vhost_virtqueue *vq,
356 unsigned idx, bool enable_log)
357{
358 struct vhost_vring_addr addr = {
359 .index = idx,
360 .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
361 .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
362 .used_user_addr = (uint64_t)(unsigned long)vq->used,
363 .log_guest_addr = vq->used_phys,
364 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
365 };
366 int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
367 if (r < 0) {
368 return -errno;
369 }
370 return 0;
371}
372
373static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
374{
375 uint64_t features = dev->acked_features;
376 int r;
377 if (enable_log) {
378 features |= 0x1 << VHOST_F_LOG_ALL;
379 }
380 r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
381 return r < 0 ? -errno : 0;
382}
383
384static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
385{
386 int r, t, i;
387 r = vhost_dev_set_features(dev, enable_log);
388 if (r < 0) {
389 goto err_features;
390 }
391 for (i = 0; i < dev->nvqs; ++i) {
392 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
393 enable_log);
394 if (r < 0) {
395 goto err_vq;
396 }
397 }
398 return 0;
399err_vq:
400 for (; i >= 0; --i) {
401 t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
402 dev->log_enabled);
403 assert(t >= 0);
404 }
405 t = vhost_dev_set_features(dev, dev->log_enabled);
406 assert(t >= 0);
407err_features:
408 return r;
409}
410
411static int vhost_client_migration_log(CPUPhysMemoryClient *client,
412 int enable)
413{
414 struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
415 int r;
416 if (!!enable == dev->log_enabled) {
417 return 0;
418 }
419 if (!dev->started) {
420 dev->log_enabled = enable;
421 return 0;
422 }
423 if (!enable) {
424 r = vhost_dev_set_log(dev, false);
425 if (r < 0) {
426 return r;
427 }
428 if (dev->log) {
429 qemu_free(dev->log);
430 }
431 dev->log = NULL;
432 dev->log_size = 0;
433 } else {
434 vhost_dev_log_resize(dev, vhost_get_log_size(dev));
435 r = vhost_dev_set_log(dev, true);
436 if (r < 0) {
437 return r;
438 }
439 }
440 dev->log_enabled = enable;
441 return 0;
442}
443
444static int vhost_virtqueue_init(struct vhost_dev *dev,
445 struct VirtIODevice *vdev,
446 struct vhost_virtqueue *vq,
447 unsigned idx)
448{
449 target_phys_addr_t s, l, a;
450 int r;
451 struct vhost_vring_file file = {
452 .index = idx,
453 };
454 struct vhost_vring_state state = {
455 .index = idx,
456 };
457 struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
458
459 if (!vdev->binding->set_host_notifier) {
460 fprintf(stderr, "binding does not support host notifiers\n");
461 return -ENOSYS;
462 }
463
464 vq->num = state.num = virtio_queue_get_num(vdev, idx);
465 r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
466 if (r) {
467 return -errno;
468 }
469
470 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
471 r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
472 if (r) {
473 return -errno;
474 }
475
476 s = l = virtio_queue_get_desc_size(vdev, idx);
477 a = virtio_queue_get_desc_addr(vdev, idx);
478 vq->desc = cpu_physical_memory_map(a, &l, 0);
479 if (!vq->desc || l != s) {
480 r = -ENOMEM;
481 goto fail_alloc_desc;
482 }
483 s = l = virtio_queue_get_avail_size(vdev, idx);
484 a = virtio_queue_get_avail_addr(vdev, idx);
485 vq->avail = cpu_physical_memory_map(a, &l, 0);
486 if (!vq->avail || l != s) {
487 r = -ENOMEM;
488 goto fail_alloc_avail;
489 }
490 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
491 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
492 vq->used = cpu_physical_memory_map(a, &l, 1);
493 if (!vq->used || l != s) {
494 r = -ENOMEM;
495 goto fail_alloc_used;
496 }
497
498 vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
499 vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
500 vq->ring = cpu_physical_memory_map(a, &l, 1);
501 if (!vq->ring || l != s) {
502 r = -ENOMEM;
503 goto fail_alloc_ring;
504 }
505
506 r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
507 if (r < 0) {
508 r = -errno;
509 goto fail_alloc;
510 }
511 r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
512 if (r < 0) {
513 fprintf(stderr, "Error binding host notifier: %d\n", -r);
514 goto fail_host_notifier;
515 }
516
517 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
518 r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
519 if (r) {
520 r = -errno;
521 goto fail_kick;
522 }
523
524 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
525 r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
526 if (r) {
527 r = -errno;
528 goto fail_call;
529 }
530
531 return 0;
532
533fail_call:
534fail_kick:
535 vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
536fail_host_notifier:
537fail_alloc:
538 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
539 0, 0);
540fail_alloc_ring:
541 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
542 0, 0);
543fail_alloc_used:
544 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
545 0, 0);
546fail_alloc_avail:
547 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
548 0, 0);
549fail_alloc_desc:
550 return r;
551}
552
553static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
554 struct VirtIODevice *vdev,
555 struct vhost_virtqueue *vq,
556 unsigned idx)
557{
558 struct vhost_vring_state state = {
559 .index = idx,
560 };
561 int r;
562 r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
563 if (r < 0) {
564 fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
565 fflush(stderr);
566 }
567 assert (r >= 0);
568 r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
569 if (r < 0) {
570 fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
571 fflush(stderr);
572 }
573 virtio_queue_set_last_avail_idx(vdev, idx, state.num);
574 assert (r >= 0);
575 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
576 0, virtio_queue_get_ring_size(vdev, idx));
577 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
578 1, virtio_queue_get_used_size(vdev, idx));
579 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
580 0, virtio_queue_get_avail_size(vdev, idx));
581 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
582 0, virtio_queue_get_desc_size(vdev, idx));
583}
584
585int vhost_dev_init(struct vhost_dev *hdev, int devfd)
586{
587 uint64_t features;
588 int r;
589 if (devfd >= 0) {
590 hdev->control = devfd;
591 } else {
592 hdev->control = open("/dev/vhost-net", O_RDWR);
593 if (hdev->control < 0) {
594 return -errno;
595 }
596 }
597 r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
598 if (r < 0) {
599 goto fail;
600 }
601
602 r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
603 if (r < 0) {
604 goto fail;
605 }
606 hdev->features = features;
607
608 hdev->client.set_memory = vhost_client_set_memory;
609 hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
610 hdev->client.migration_log = vhost_client_migration_log;
611 hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
612 hdev->log = NULL;
613 hdev->log_size = 0;
614 hdev->log_enabled = false;
615 hdev->started = false;
616 cpu_register_phys_memory_client(&hdev->client);
617 return 0;
618fail:
619 r = -errno;
620 close(hdev->control);
621 return r;
622}
623
624void vhost_dev_cleanup(struct vhost_dev *hdev)
625{
626 cpu_unregister_phys_memory_client(&hdev->client);
627 qemu_free(hdev->mem);
628 close(hdev->control);
629}
630
631int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
632{
633 int i, r;
634 if (!vdev->binding->set_guest_notifiers) {
635 fprintf(stderr, "binding does not support guest notifiers\n");
636 r = -ENOSYS;
637 goto fail;
638 }
639
640 r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
641 if (r < 0) {
642 fprintf(stderr, "Error binding guest notifier: %d\n", -r);
643 goto fail_notifiers;
644 }
645
646 r = vhost_dev_set_features(hdev, hdev->log_enabled);
647 if (r < 0) {
648 goto fail_features;
649 }
650 r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
651 if (r < 0) {
652 r = -errno;
653 goto fail_mem;
654 }
655 for (i = 0; i < hdev->nvqs; ++i) {
656 r = vhost_virtqueue_init(hdev,
657 vdev,
658 hdev->vqs + i,
659 i);
660 if (r < 0) {
661 goto fail_vq;
662 }
663 }
664
665 if (hdev->log_enabled) {
666 hdev->log_size = vhost_get_log_size(hdev);
667 hdev->log = hdev->log_size ?
668 qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
669 r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
670 (uint64_t)(unsigned long)hdev->log);
671 if (r < 0) {
672 r = -errno;
673 goto fail_log;
674 }
675 }
676
677 hdev->started = true;
678
679 return 0;
680fail_log:
681fail_vq:
682 while (--i >= 0) {
683 vhost_virtqueue_cleanup(hdev,
684 vdev,
685 hdev->vqs + i,
686 i);
687 }
688fail_mem:
689fail_features:
690 vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
691fail_notifiers:
692fail:
693 return r;
694}
695
696void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
697{
698 int i, r;
699
700 for (i = 0; i < hdev->nvqs; ++i) {
701 vhost_virtqueue_cleanup(hdev,
702 vdev,
703 hdev->vqs + i,
704 i);
705 }
706 vhost_client_sync_dirty_bitmap(&hdev->client, 0,
707 (target_phys_addr_t)~0x0ull);
708 r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
709 if (r < 0) {
710 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
711 fflush(stderr);
712 }
713 assert (r >= 0);
714
715 hdev->started = false;
716 qemu_free(hdev->log);
717 hdev->log_size = 0;
718}
719