1
2
3
4
5
6
7
8
9
10
11
12
13#include "qemu/osdep.h"
14#include "qemu-common.h"
15#include "qemu/iov.h"
16#include "qemu/cutils.h"
17#include "qemu/error-report.h"
18#include "qemu/units.h"
19#include "sysemu/numa.h"
20#include "sysemu/sysemu.h"
21#include "sysemu/reset.h"
22#include "hw/virtio/virtio.h"
23#include "hw/virtio/virtio-bus.h"
24#include "hw/virtio/virtio-access.h"
25#include "hw/virtio/virtio-mem.h"
26#include "qapi/error.h"
27#include "qapi/visitor.h"
28#include "exec/ram_addr.h"
29#include "migration/misc.h"
30#include "hw/boards.h"
31#include "hw/qdev-properties.h"
32#include CONFIG_DEVICES
33#include "trace.h"
34
35
36
37
38
39#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
40
41#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
42 defined(__powerpc64__)
43#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
44#else
45
46#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
47#endif
48
49
50
51
52
53
54
55
56
57
58
59
60
61static uint32_t thp_size;
62
63#define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
64static uint32_t virtio_mem_thp_size(void)
65{
66 gchar *content = NULL;
67 const char *endptr;
68 uint64_t tmp;
69
70 if (thp_size) {
71 return thp_size;
72 }
73
74
75
76
77
78 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
79 !qemu_strtou64(content, &endptr, 0, &tmp) &&
80 (!endptr || *endptr == '\n')) {
81
82
83
84
85 if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
86 warn_report("Read unsupported THP size: %" PRIx64, tmp);
87 } else {
88 thp_size = tmp;
89 }
90 }
91
92 if (!thp_size) {
93 thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
94 warn_report("Could not detect THP size, falling back to %" PRIx64
95 " MiB.", thp_size / MiB);
96 }
97
98 g_free(content);
99 return thp_size;
100}
101
102static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
103{
104 const uint64_t page_size = qemu_ram_pagesize(rb);
105
106
107 if (page_size == qemu_real_host_page_size) {
108 return MAX(page_size, virtio_mem_thp_size());
109 }
110 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
111}
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127#if defined(TARGET_X86_64) || defined(TARGET_I386)
128#define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
129#else
130#error VIRTIO_MEM_USABLE_EXTENT not defined
131#endif
132
133static bool virtio_mem_is_busy(void)
134{
135
136
137
138
139
140
141
142
143
144
145 return migration_in_incoming_postcopy() || !migration_is_idle();
146}
147
148typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
149 uint64_t offset, uint64_t size);
150
151static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
152 virtio_mem_range_cb cb)
153{
154 unsigned long first_zero_bit, last_zero_bit;
155 uint64_t offset, size;
156 int ret = 0;
157
158 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
159 while (first_zero_bit < vmem->bitmap_size) {
160 offset = first_zero_bit * vmem->block_size;
161 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
162 first_zero_bit + 1) - 1;
163 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
164
165 ret = cb(vmem, arg, offset, size);
166 if (ret) {
167 break;
168 }
169 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
170 last_zero_bit + 2);
171 }
172 return ret;
173}
174
175
176
177
178
179
180static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
181 uint64_t offset, uint64_t size)
182{
183 uint64_t start = MAX(s->offset_within_region, offset);
184 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
185 offset + size);
186
187 if (end <= start) {
188 return false;
189 }
190
191 s->offset_within_address_space += start - s->offset_within_region;
192 s->offset_within_region = start;
193 s->size = int128_make64(end - start);
194 return true;
195}
196
197typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
198
199static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
200 MemoryRegionSection *s,
201 void *arg,
202 virtio_mem_section_cb cb)
203{
204 unsigned long first_bit, last_bit;
205 uint64_t offset, size;
206 int ret = 0;
207
208 first_bit = s->offset_within_region / vmem->bitmap_size;
209 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
210 while (first_bit < vmem->bitmap_size) {
211 MemoryRegionSection tmp = *s;
212
213 offset = first_bit * vmem->block_size;
214 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
215 first_bit + 1) - 1;
216 size = (last_bit - first_bit + 1) * vmem->block_size;
217
218 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
219 break;
220 }
221 ret = cb(&tmp, arg);
222 if (ret) {
223 break;
224 }
225 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
226 last_bit + 2);
227 }
228 return ret;
229}
230
231static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
232 MemoryRegionSection *s,
233 void *arg,
234 virtio_mem_section_cb cb)
235{
236 unsigned long first_bit, last_bit;
237 uint64_t offset, size;
238 int ret = 0;
239
240 first_bit = s->offset_within_region / vmem->bitmap_size;
241 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
242 while (first_bit < vmem->bitmap_size) {
243 MemoryRegionSection tmp = *s;
244
245 offset = first_bit * vmem->block_size;
246 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
247 first_bit + 1) - 1;
248 size = (last_bit - first_bit + 1) * vmem->block_size;
249
250 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
251 break;
252 }
253 ret = cb(&tmp, arg);
254 if (ret) {
255 break;
256 }
257 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
258 last_bit + 2);
259 }
260 return ret;
261}
262
263static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
264{
265 RamDiscardListener *rdl = arg;
266
267 return rdl->notify_populate(rdl, s);
268}
269
270static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
271{
272 RamDiscardListener *rdl = arg;
273
274 rdl->notify_discard(rdl, s);
275 return 0;
276}
277
278static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
279 uint64_t size)
280{
281 RamDiscardListener *rdl;
282
283 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
284 MemoryRegionSection tmp = *rdl->section;
285
286 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
287 continue;
288 }
289 rdl->notify_discard(rdl, &tmp);
290 }
291}
292
293static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
294 uint64_t size)
295{
296 RamDiscardListener *rdl, *rdl2;
297 int ret = 0;
298
299 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
300 MemoryRegionSection tmp = *rdl->section;
301
302 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
303 continue;
304 }
305 ret = rdl->notify_populate(rdl, &tmp);
306 if (ret) {
307 break;
308 }
309 }
310
311 if (ret) {
312
313 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
314 MemoryRegionSection tmp = *rdl->section;
315
316 if (rdl2 == rdl) {
317 break;
318 }
319 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
320 continue;
321 }
322 rdl2->notify_discard(rdl2, &tmp);
323 }
324 }
325 return ret;
326}
327
328static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
329{
330 RamDiscardListener *rdl;
331
332 if (!vmem->size) {
333 return;
334 }
335
336 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
337 if (rdl->double_discard_supported) {
338 rdl->notify_discard(rdl, rdl->section);
339 } else {
340 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
341 virtio_mem_notify_discard_cb);
342 }
343 }
344}
345
346static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
347 uint64_t size, bool plugged)
348{
349 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
350 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
351 unsigned long found_bit;
352
353
354 if (plugged) {
355 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
356 } else {
357 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
358 }
359 return found_bit > last_bit;
360}
361
362static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
363 uint64_t size, bool plugged)
364{
365 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
366 const unsigned long nbits = size / vmem->block_size;
367
368 if (plugged) {
369 bitmap_set(vmem->bitmap, bit, nbits);
370 } else {
371 bitmap_clear(vmem->bitmap, bit, nbits);
372 }
373}
374
375static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
376 struct virtio_mem_resp *resp)
377{
378 VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
379 VirtQueue *vq = vmem->vq;
380
381 trace_virtio_mem_send_response(le16_to_cpu(resp->type));
382 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
383
384 virtqueue_push(vq, elem, sizeof(*resp));
385 virtio_notify(vdev, vq);
386}
387
388static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
389 VirtQueueElement *elem,
390 uint16_t type)
391{
392 struct virtio_mem_resp resp = {
393 .type = cpu_to_le16(type),
394 };
395
396 virtio_mem_send_response(vmem, elem, &resp);
397}
398
399static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
400 uint64_t size)
401{
402 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
403 return false;
404 }
405 if (gpa + size < gpa || !size) {
406 return false;
407 }
408 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
409 return false;
410 }
411 if (gpa + size > vmem->addr + vmem->usable_region_size) {
412 return false;
413 }
414 return true;
415}
416
417static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
418 uint64_t size, bool plug)
419{
420 const uint64_t offset = start_gpa - vmem->addr;
421 RAMBlock *rb = vmem->memdev->mr.ram_block;
422
423 if (virtio_mem_is_busy()) {
424 return -EBUSY;
425 }
426
427 if (!plug) {
428 if (ram_block_discard_range(rb, offset, size)) {
429 return -EBUSY;
430 }
431 virtio_mem_notify_unplug(vmem, offset, size);
432 } else if (virtio_mem_notify_plug(vmem, offset, size)) {
433
434 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
435 return -EBUSY;
436 }
437 virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
438 return 0;
439}
440
441static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
442 uint16_t nb_blocks, bool plug)
443{
444 const uint64_t size = nb_blocks * vmem->block_size;
445 int ret;
446
447 if (!virtio_mem_valid_range(vmem, gpa, size)) {
448 return VIRTIO_MEM_RESP_ERROR;
449 }
450
451 if (plug && (vmem->size + size > vmem->requested_size)) {
452 return VIRTIO_MEM_RESP_NACK;
453 }
454
455
456 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
457 return VIRTIO_MEM_RESP_ERROR;
458 }
459
460 ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
461 if (ret) {
462 return VIRTIO_MEM_RESP_BUSY;
463 }
464 if (plug) {
465 vmem->size += size;
466 } else {
467 vmem->size -= size;
468 }
469 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
470 return VIRTIO_MEM_RESP_ACK;
471}
472
473static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
474 struct virtio_mem_req *req)
475{
476 const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
477 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
478 uint16_t type;
479
480 trace_virtio_mem_plug_request(gpa, nb_blocks);
481 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
482 virtio_mem_send_response_simple(vmem, elem, type);
483}
484
485static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
486 struct virtio_mem_req *req)
487{
488 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
489 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
490 uint16_t type;
491
492 trace_virtio_mem_unplug_request(gpa, nb_blocks);
493 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
494 virtio_mem_send_response_simple(vmem, elem, type);
495}
496
497static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
498 uint64_t requested_size,
499 bool can_shrink)
500{
501 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
502 requested_size + VIRTIO_MEM_USABLE_EXTENT);
503
504
505 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
506
507 if (!requested_size) {
508 newsize = 0;
509 }
510
511 if (newsize < vmem->usable_region_size && !can_shrink) {
512 return;
513 }
514
515 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
516 vmem->usable_region_size = newsize;
517}
518
519static int virtio_mem_unplug_all(VirtIOMEM *vmem)
520{
521 RAMBlock *rb = vmem->memdev->mr.ram_block;
522
523 if (virtio_mem_is_busy()) {
524 return -EBUSY;
525 }
526
527 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
528 return -EBUSY;
529 }
530 virtio_mem_notify_unplug_all(vmem);
531
532 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
533 if (vmem->size) {
534 vmem->size = 0;
535 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
536 }
537 trace_virtio_mem_unplugged_all();
538 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
539 return 0;
540}
541
542static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
543 VirtQueueElement *elem)
544{
545 trace_virtio_mem_unplug_all_request();
546 if (virtio_mem_unplug_all(vmem)) {
547 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
548 } else {
549 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
550 }
551}
552
553static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
554 struct virtio_mem_req *req)
555{
556 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
557 const uint64_t gpa = le64_to_cpu(req->u.state.addr);
558 const uint64_t size = nb_blocks * vmem->block_size;
559 struct virtio_mem_resp resp = {
560 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
561 };
562
563 trace_virtio_mem_state_request(gpa, nb_blocks);
564 if (!virtio_mem_valid_range(vmem, gpa, size)) {
565 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
566 return;
567 }
568
569 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
570 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
571 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
572 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
573 } else {
574 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
575 }
576 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
577 virtio_mem_send_response(vmem, elem, &resp);
578}
579
580static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
581{
582 const int len = sizeof(struct virtio_mem_req);
583 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
584 VirtQueueElement *elem;
585 struct virtio_mem_req req;
586 uint16_t type;
587
588 while (true) {
589 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
590 if (!elem) {
591 return;
592 }
593
594 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
595 virtio_error(vdev, "virtio-mem protocol violation: invalid request"
596 " size: %d", len);
597 virtqueue_detach_element(vq, elem, 0);
598 g_free(elem);
599 return;
600 }
601
602 if (iov_size(elem->in_sg, elem->in_num) <
603 sizeof(struct virtio_mem_resp)) {
604 virtio_error(vdev, "virtio-mem protocol violation: not enough space"
605 " for response: %zu",
606 iov_size(elem->in_sg, elem->in_num));
607 virtqueue_detach_element(vq, elem, 0);
608 g_free(elem);
609 return;
610 }
611
612 type = le16_to_cpu(req.type);
613 switch (type) {
614 case VIRTIO_MEM_REQ_PLUG:
615 virtio_mem_plug_request(vmem, elem, &req);
616 break;
617 case VIRTIO_MEM_REQ_UNPLUG:
618 virtio_mem_unplug_request(vmem, elem, &req);
619 break;
620 case VIRTIO_MEM_REQ_UNPLUG_ALL:
621 virtio_mem_unplug_all_request(vmem, elem);
622 break;
623 case VIRTIO_MEM_REQ_STATE:
624 virtio_mem_state_request(vmem, elem, &req);
625 break;
626 default:
627 virtio_error(vdev, "virtio-mem protocol violation: unknown request"
628 " type: %d", type);
629 virtqueue_detach_element(vq, elem, 0);
630 g_free(elem);
631 return;
632 }
633
634 g_free(elem);
635 }
636}
637
638static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
639{
640 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
641 struct virtio_mem_config *config = (void *) config_data;
642
643 config->block_size = cpu_to_le64(vmem->block_size);
644 config->node_id = cpu_to_le16(vmem->node);
645 config->requested_size = cpu_to_le64(vmem->requested_size);
646 config->plugged_size = cpu_to_le64(vmem->size);
647 config->addr = cpu_to_le64(vmem->addr);
648 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
649 config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
650}
651
652static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
653 Error **errp)
654{
655 MachineState *ms = MACHINE(qdev_get_machine());
656
657 if (ms->numa_state) {
658#if defined(CONFIG_ACPI)
659 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
660#endif
661 }
662 return features;
663}
664
665static void virtio_mem_system_reset(void *opaque)
666{
667 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
668
669
670
671
672
673
674 virtio_mem_unplug_all(vmem);
675}
676
677static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
678{
679 MachineState *ms = MACHINE(qdev_get_machine());
680 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
681 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
682 VirtIOMEM *vmem = VIRTIO_MEM(dev);
683 uint64_t page_size;
684 RAMBlock *rb;
685 int ret;
686
687 if (!vmem->memdev) {
688 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
689 return;
690 } else if (host_memory_backend_is_mapped(vmem->memdev)) {
691 error_setg(errp, "'%s' property specifies a busy memdev: %s",
692 VIRTIO_MEM_MEMDEV_PROP,
693 object_get_canonical_path_component(OBJECT(vmem->memdev)));
694 return;
695 } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
696 memory_region_is_rom(&vmem->memdev->mr) ||
697 !vmem->memdev->mr.ram_block) {
698 error_setg(errp, "'%s' property specifies an unsupported memdev",
699 VIRTIO_MEM_MEMDEV_PROP);
700 return;
701 }
702
703 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
704 (!nb_numa_nodes && vmem->node)) {
705 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
706 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
707 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
708 return;
709 }
710
711 if (enable_mlock) {
712 error_setg(errp, "Incompatible with mlock");
713 return;
714 }
715
716 rb = vmem->memdev->mr.ram_block;
717 page_size = qemu_ram_pagesize(rb);
718
719
720
721
722
723
724 if (!vmem->block_size) {
725 vmem->block_size = virtio_mem_default_block_size(rb);
726 }
727
728 if (vmem->block_size < page_size) {
729 error_setg(errp, "'%s' property has to be at least the page size (0x%"
730 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
731 return;
732 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
733 warn_report("'%s' property is smaller than the default block size (%"
734 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
735 virtio_mem_default_block_size(rb) / MiB);
736 } else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
737 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
738 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
739 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
740 return;
741 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
742 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
743 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
744 vmem->block_size);
745 return;
746 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
747 vmem->block_size)) {
748 error_setg(errp, "'%s' property memdev size has to be multiples of"
749 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
750 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
751 return;
752 }
753
754 if (ram_block_coordinated_discard_require(true)) {
755 error_setg(errp, "Discarding RAM is disabled");
756 return;
757 }
758
759 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
760 if (ret) {
761 error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
762 ram_block_coordinated_discard_require(false);
763 return;
764 }
765
766 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
767
768 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
769 vmem->block_size;
770 vmem->bitmap = bitmap_new(vmem->bitmap_size);
771
772 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
773 sizeof(struct virtio_mem_config));
774 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
775
776 host_memory_backend_set_mapped(vmem->memdev, true);
777 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
778 qemu_register_reset(virtio_mem_system_reset, vmem);
779
780
781
782
783
784 memory_region_set_ram_discard_manager(&vmem->memdev->mr,
785 RAM_DISCARD_MANAGER(vmem));
786}
787
788static void virtio_mem_device_unrealize(DeviceState *dev)
789{
790 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
791 VirtIOMEM *vmem = VIRTIO_MEM(dev);
792
793
794
795
796
797 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
798 qemu_unregister_reset(virtio_mem_system_reset, vmem);
799 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
800 host_memory_backend_set_mapped(vmem->memdev, false);
801 virtio_del_queue(vdev, 0);
802 virtio_cleanup(vdev);
803 g_free(vmem->bitmap);
804 ram_block_coordinated_discard_require(false);
805}
806
807static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
808 uint64_t offset, uint64_t size)
809{
810 RAMBlock *rb = vmem->memdev->mr.ram_block;
811
812 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
813}
814
815static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
816{
817
818 return virtio_mem_for_each_unplugged_range(vmem, NULL,
819 virtio_mem_discard_range_cb);
820}
821
822static int virtio_mem_post_load(void *opaque, int version_id)
823{
824 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
825 RamDiscardListener *rdl;
826 int ret;
827
828
829
830
831
832 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
833 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
834 virtio_mem_notify_populate_cb);
835 if (ret) {
836 return ret;
837 }
838 }
839
840 if (migration_in_incoming_postcopy()) {
841 return 0;
842 }
843
844 return virtio_mem_restore_unplugged(vmem);
845}
846
847typedef struct VirtIOMEMMigSanityChecks {
848 VirtIOMEM *parent;
849 uint64_t addr;
850 uint64_t region_size;
851 uint64_t block_size;
852 uint32_t node;
853} VirtIOMEMMigSanityChecks;
854
855static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
856{
857 VirtIOMEMMigSanityChecks *tmp = opaque;
858 VirtIOMEM *vmem = tmp->parent;
859
860 tmp->addr = vmem->addr;
861 tmp->region_size = memory_region_size(&vmem->memdev->mr);
862 tmp->block_size = vmem->block_size;
863 tmp->node = vmem->node;
864 return 0;
865}
866
867static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
868{
869 VirtIOMEMMigSanityChecks *tmp = opaque;
870 VirtIOMEM *vmem = tmp->parent;
871 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
872
873 if (tmp->addr != vmem->addr) {
874 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
875 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
876 return -EINVAL;
877 }
878
879
880
881
882 if (tmp->region_size != new_region_size) {
883 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
884 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
885 new_region_size);
886 return -EINVAL;
887 }
888 if (tmp->block_size != vmem->block_size) {
889 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
890 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
891 vmem->block_size);
892 return -EINVAL;
893 }
894 if (tmp->node != vmem->node) {
895 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
896 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
897 return -EINVAL;
898 }
899 return 0;
900}
901
902static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
903 .name = "virtio-mem-device/sanity-checks",
904 .pre_save = virtio_mem_mig_sanity_checks_pre_save,
905 .post_load = virtio_mem_mig_sanity_checks_post_load,
906 .fields = (VMStateField[]) {
907 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
908 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
909 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
910 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
911 VMSTATE_END_OF_LIST(),
912 },
913};
914
915static const VMStateDescription vmstate_virtio_mem_device = {
916 .name = "virtio-mem-device",
917 .minimum_version_id = 1,
918 .version_id = 1,
919 .priority = MIG_PRI_VIRTIO_MEM,
920 .post_load = virtio_mem_post_load,
921 .fields = (VMStateField[]) {
922 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
923 vmstate_virtio_mem_sanity_checks),
924 VMSTATE_UINT64(usable_region_size, VirtIOMEM),
925 VMSTATE_UINT64(size, VirtIOMEM),
926 VMSTATE_UINT64(requested_size, VirtIOMEM),
927 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
928 VMSTATE_END_OF_LIST()
929 },
930};
931
932static const VMStateDescription vmstate_virtio_mem = {
933 .name = "virtio-mem",
934 .minimum_version_id = 1,
935 .version_id = 1,
936 .fields = (VMStateField[]) {
937 VMSTATE_VIRTIO_DEVICE,
938 VMSTATE_END_OF_LIST()
939 },
940};
941
942static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
943 VirtioMEMDeviceInfo *vi)
944{
945 vi->memaddr = vmem->addr;
946 vi->node = vmem->node;
947 vi->requested_size = vmem->requested_size;
948 vi->size = vmem->size;
949 vi->max_size = memory_region_size(&vmem->memdev->mr);
950 vi->block_size = vmem->block_size;
951 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
952}
953
954static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
955{
956 if (!vmem->memdev) {
957 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
958 return NULL;
959 }
960
961 return &vmem->memdev->mr;
962}
963
964static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
965 Notifier *notifier)
966{
967 notifier_list_add(&vmem->size_change_notifiers, notifier);
968}
969
970static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
971 Notifier *notifier)
972{
973 notifier_remove(notifier);
974}
975
976static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
977 void *opaque, Error **errp)
978{
979 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
980 uint64_t value = vmem->size;
981
982 visit_type_size(v, name, &value, errp);
983}
984
985static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
986 const char *name, void *opaque,
987 Error **errp)
988{
989 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
990 uint64_t value = vmem->requested_size;
991
992 visit_type_size(v, name, &value, errp);
993}
994
995static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
996 const char *name, void *opaque,
997 Error **errp)
998{
999 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1000 Error *err = NULL;
1001 uint64_t value;
1002
1003 visit_type_size(v, name, &value, &err);
1004 if (err) {
1005 error_propagate(errp, err);
1006 return;
1007 }
1008
1009
1010
1011
1012
1013 if (DEVICE(obj)->realized) {
1014 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1015 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1016 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1017 vmem->block_size);
1018 return;
1019 } else if (value > memory_region_size(&vmem->memdev->mr)) {
1020 error_setg(errp, "'%s' cannot exceed the memory backend size"
1021 "(0x%" PRIx64 ")", name,
1022 memory_region_size(&vmem->memdev->mr));
1023 return;
1024 }
1025
1026 if (value != vmem->requested_size) {
1027 virtio_mem_resize_usable_region(vmem, value, false);
1028 vmem->requested_size = value;
1029 }
1030
1031
1032
1033
1034 virtio_notify_config(VIRTIO_DEVICE(vmem));
1035 } else {
1036 vmem->requested_size = value;
1037 }
1038}
1039
1040static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1041 void *opaque, Error **errp)
1042{
1043 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1044 uint64_t value = vmem->block_size;
1045
1046
1047
1048
1049
1050 if (!value) {
1051 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1052 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1053 } else {
1054 value = virtio_mem_thp_size();
1055 }
1056 }
1057
1058 visit_type_size(v, name, &value, errp);
1059}
1060
1061static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1062 void *opaque, Error **errp)
1063{
1064 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1065 Error *err = NULL;
1066 uint64_t value;
1067
1068 if (DEVICE(obj)->realized) {
1069 error_setg(errp, "'%s' cannot be changed", name);
1070 return;
1071 }
1072
1073 visit_type_size(v, name, &value, &err);
1074 if (err) {
1075 error_propagate(errp, err);
1076 return;
1077 }
1078
1079 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1080 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1081 VIRTIO_MEM_MIN_BLOCK_SIZE);
1082 return;
1083 } else if (!is_power_of_2(value)) {
1084 error_setg(errp, "'%s' property has to be a power of two", name);
1085 return;
1086 }
1087 vmem->block_size = value;
1088}
1089
1090static void virtio_mem_instance_init(Object *obj)
1091{
1092 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1093
1094 notifier_list_init(&vmem->size_change_notifiers);
1095 QLIST_INIT(&vmem->rdl_list);
1096
1097 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1098 NULL, NULL, NULL);
1099 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1100 virtio_mem_get_requested_size,
1101 virtio_mem_set_requested_size, NULL, NULL);
1102 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1103 virtio_mem_get_block_size, virtio_mem_set_block_size,
1104 NULL, NULL);
1105}
1106
1107static Property virtio_mem_properties[] = {
1108 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1109 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1110 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1111 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1112 DEFINE_PROP_END_OF_LIST(),
1113};
1114
1115static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1116 const MemoryRegion *mr)
1117{
1118 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1119
1120 g_assert(mr == &vmem->memdev->mr);
1121 return vmem->block_size;
1122}
1123
1124static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1125 const MemoryRegionSection *s)
1126{
1127 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1128 uint64_t start_gpa = vmem->addr + s->offset_within_region;
1129 uint64_t end_gpa = start_gpa + int128_get64(s->size);
1130
1131 g_assert(s->mr == &vmem->memdev->mr);
1132
1133 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1134 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1135
1136 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1137 return false;
1138 }
1139
1140 return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1141}
1142
1143struct VirtIOMEMReplayData {
1144 void *fn;
1145 void *opaque;
1146};
1147
1148static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1149{
1150 struct VirtIOMEMReplayData *data = arg;
1151
1152 return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1153}
1154
1155static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1156 MemoryRegionSection *s,
1157 ReplayRamPopulate replay_fn,
1158 void *opaque)
1159{
1160 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1161 struct VirtIOMEMReplayData data = {
1162 .fn = replay_fn,
1163 .opaque = opaque,
1164 };
1165
1166 g_assert(s->mr == &vmem->memdev->mr);
1167 return virtio_mem_for_each_plugged_section(vmem, s, &data,
1168 virtio_mem_rdm_replay_populated_cb);
1169}
1170
1171static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1172 void *arg)
1173{
1174 struct VirtIOMEMReplayData *data = arg;
1175
1176 ((ReplayRamDiscard)data->fn)(s, data->opaque);
1177 return 0;
1178}
1179
1180static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1181 MemoryRegionSection *s,
1182 ReplayRamDiscard replay_fn,
1183 void *opaque)
1184{
1185 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1186 struct VirtIOMEMReplayData data = {
1187 .fn = replay_fn,
1188 .opaque = opaque,
1189 };
1190
1191 g_assert(s->mr == &vmem->memdev->mr);
1192 virtio_mem_for_each_unplugged_section(vmem, s, &data,
1193 virtio_mem_rdm_replay_discarded_cb);
1194}
1195
1196static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1197 RamDiscardListener *rdl,
1198 MemoryRegionSection *s)
1199{
1200 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1201 int ret;
1202
1203 g_assert(s->mr == &vmem->memdev->mr);
1204 rdl->section = memory_region_section_new_copy(s);
1205
1206 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1207 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1208 virtio_mem_notify_populate_cb);
1209 if (ret) {
1210 error_report("%s: Replaying plugged ranges failed: %s", __func__,
1211 strerror(-ret));
1212 }
1213}
1214
1215static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1216 RamDiscardListener *rdl)
1217{
1218 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1219
1220 g_assert(rdl->section->mr == &vmem->memdev->mr);
1221 if (vmem->size) {
1222 if (rdl->double_discard_supported) {
1223 rdl->notify_discard(rdl, rdl->section);
1224 } else {
1225 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1226 virtio_mem_notify_discard_cb);
1227 }
1228 }
1229
1230 memory_region_section_free_copy(rdl->section);
1231 rdl->section = NULL;
1232 QLIST_REMOVE(rdl, next);
1233}
1234
1235static void virtio_mem_class_init(ObjectClass *klass, void *data)
1236{
1237 DeviceClass *dc = DEVICE_CLASS(klass);
1238 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1239 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1240 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1241
1242 device_class_set_props(dc, virtio_mem_properties);
1243 dc->vmsd = &vmstate_virtio_mem;
1244
1245 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1246 vdc->realize = virtio_mem_device_realize;
1247 vdc->unrealize = virtio_mem_device_unrealize;
1248 vdc->get_config = virtio_mem_get_config;
1249 vdc->get_features = virtio_mem_get_features;
1250 vdc->vmsd = &vmstate_virtio_mem_device;
1251
1252 vmc->fill_device_info = virtio_mem_fill_device_info;
1253 vmc->get_memory_region = virtio_mem_get_memory_region;
1254 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1255 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1256
1257 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1258 rdmc->is_populated = virtio_mem_rdm_is_populated;
1259 rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1260 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1261 rdmc->register_listener = virtio_mem_rdm_register_listener;
1262 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1263}
1264
1265static const TypeInfo virtio_mem_info = {
1266 .name = TYPE_VIRTIO_MEM,
1267 .parent = TYPE_VIRTIO_DEVICE,
1268 .instance_size = sizeof(VirtIOMEM),
1269 .instance_init = virtio_mem_instance_init,
1270 .class_init = virtio_mem_class_init,
1271 .class_size = sizeof(VirtIOMEMClass),
1272 .interfaces = (InterfaceInfo[]) {
1273 { TYPE_RAM_DISCARD_MANAGER },
1274 { }
1275 },
1276};
1277
1278static void virtio_register_types(void)
1279{
1280 type_register_static(&virtio_mem_info);
1281}
1282
1283type_init(virtio_register_types)
1284