1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "exec/target_page.h"
21#include "migration.h"
22#include "qemu-file.h"
23#include "savevm.h"
24#include "postcopy-ram.h"
25#include "ram.h"
26#include "qapi/error.h"
27#include "qemu/notify.h"
28#include "sysemu/sysemu.h"
29#include "sysemu/balloon.h"
30#include "qemu/error-report.h"
31#include "trace.h"
32
33
34
35
36#define MAX_DISCARDS_PER_COMMAND 12
37
38struct PostcopyDiscardState {
39 const char *ramblock_name;
40 uint16_t cur_entry;
41
42
43
44 uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
45 uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
46 unsigned int nsentwords;
47 unsigned int nsentcmds;
48};
49
50static NotifierWithReturnList postcopy_notifier_list;
51
52void postcopy_infrastructure_init(void)
53{
54 notifier_with_return_list_init(&postcopy_notifier_list);
55}
56
57void postcopy_add_notifier(NotifierWithReturn *nn)
58{
59 notifier_with_return_list_add(&postcopy_notifier_list, nn);
60}
61
62void postcopy_remove_notifier(NotifierWithReturn *n)
63{
64 notifier_with_return_remove(n);
65}
66
67int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp)
68{
69 struct PostcopyNotifyData pnd;
70 pnd.reason = reason;
71 pnd.errp = errp;
72
73 return notifier_with_return_list_notify(&postcopy_notifier_list,
74 &pnd);
75}
76
77
78
79
80
81#if defined(__linux__)
82
83#include <poll.h>
84#include <sys/ioctl.h>
85#include <sys/syscall.h>
86#include <asm/types.h>
87#endif
88
89#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
90#include <sys/eventfd.h>
91#include <linux/userfaultfd.h>
92
93
94
95
96
97
98
99
100
101
102
103
104static bool receive_ufd_features(uint64_t *features)
105{
106 struct uffdio_api api_struct = {0};
107 int ufd;
108 bool ret = true;
109
110
111 ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
112 if (ufd == -1) {
113 error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
114 strerror(errno));
115 return false;
116 }
117
118
119 api_struct.api = UFFD_API;
120 api_struct.features = 0;
121 if (ioctl(ufd, UFFDIO_API, &api_struct)) {
122 error_report("%s: UFFDIO_API failed: %s", __func__,
123 strerror(errno));
124 ret = false;
125 goto release_ufd;
126 }
127
128 *features = api_struct.features;
129
130release_ufd:
131 close(ufd);
132 return ret;
133}
134
135
136
137
138
139
140
141
142
143
144static bool request_ufd_features(int ufd, uint64_t features)
145{
146 struct uffdio_api api_struct = {0};
147 uint64_t ioctl_mask;
148
149 api_struct.api = UFFD_API;
150 api_struct.features = features;
151 if (ioctl(ufd, UFFDIO_API, &api_struct)) {
152 error_report("%s failed: UFFDIO_API failed: %s", __func__,
153 strerror(errno));
154 return false;
155 }
156
157 ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
158 (__u64)1 << _UFFDIO_UNREGISTER;
159 if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
160 error_report("Missing userfault features: %" PRIx64,
161 (uint64_t)(~api_struct.ioctls & ioctl_mask));
162 return false;
163 }
164
165 return true;
166}
167
168static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
169{
170 uint64_t asked_features = 0;
171 static uint64_t supported_features;
172
173
174
175
176
177
178 if (!supported_features) {
179 if (!receive_ufd_features(&supported_features)) {
180 error_report("%s failed", __func__);
181 return false;
182 }
183 }
184
185
186
187
188
189
190 if (!request_ufd_features(ufd, asked_features)) {
191 error_report("%s failed: features %" PRIu64, __func__,
192 asked_features);
193 return false;
194 }
195
196 if (getpagesize() != ram_pagesize_summary()) {
197 bool have_hp = false;
198
199#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
200 have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS;
201#endif
202 if (!have_hp) {
203 error_report("Userfault on this host does not support huge pages");
204 return false;
205 }
206 }
207 return true;
208}
209
210
211
212static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
213 ram_addr_t offset, ram_addr_t length, void *opaque)
214{
215 RAMBlock *rb = qemu_ram_block_by_name(block_name);
216 size_t pagesize = qemu_ram_pagesize(rb);
217
218 if (length % pagesize) {
219 error_report("Postcopy requires RAM blocks to be a page size multiple,"
220 " block %s is 0x" RAM_ADDR_FMT " bytes with a "
221 "page size of 0x%zx", block_name, length, pagesize);
222 return 1;
223 }
224 return 0;
225}
226
227
228
229
230
231
232bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
233{
234 long pagesize = getpagesize();
235 int ufd = -1;
236 bool ret = false;
237 void *testarea = NULL;
238 struct uffdio_register reg_struct;
239 struct uffdio_range range_struct;
240 uint64_t feature_mask;
241 Error *local_err = NULL;
242
243 if (qemu_target_page_size() > pagesize) {
244 error_report("Target page size bigger than host page size");
245 goto out;
246 }
247
248 ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
249 if (ufd == -1) {
250 error_report("%s: userfaultfd not available: %s", __func__,
251 strerror(errno));
252 goto out;
253 }
254
255
256 if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) {
257 error_report_err(local_err);
258 goto out;
259 }
260
261
262 if (!ufd_check_and_apply(ufd, mis)) {
263 goto out;
264 }
265
266
267 if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
268 goto out;
269 }
270
271
272
273
274
275 if (munlockall()) {
276 error_report("%s: munlockall: %s", __func__, strerror(errno));
277 return -1;
278 }
279
280
281
282
283
284
285 testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
286 MAP_ANONYMOUS, -1, 0);
287 if (testarea == MAP_FAILED) {
288 error_report("%s: Failed to map test area: %s", __func__,
289 strerror(errno));
290 goto out;
291 }
292 g_assert(((size_t)testarea & (pagesize-1)) == 0);
293
294 reg_struct.range.start = (uintptr_t)testarea;
295 reg_struct.range.len = pagesize;
296 reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
297
298 if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) {
299 error_report("%s userfault register: %s", __func__, strerror(errno));
300 goto out;
301 }
302
303 range_struct.start = (uintptr_t)testarea;
304 range_struct.len = pagesize;
305 if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
306 error_report("%s userfault unregister: %s", __func__, strerror(errno));
307 goto out;
308 }
309
310 feature_mask = (__u64)1 << _UFFDIO_WAKE |
311 (__u64)1 << _UFFDIO_COPY |
312 (__u64)1 << _UFFDIO_ZEROPAGE;
313 if ((reg_struct.ioctls & feature_mask) != feature_mask) {
314 error_report("Missing userfault map features: %" PRIx64,
315 (uint64_t)(~reg_struct.ioctls & feature_mask));
316 goto out;
317 }
318
319
320 ret = true;
321out:
322 if (testarea) {
323 munmap(testarea, pagesize);
324 }
325 if (ufd != -1) {
326 close(ufd);
327 }
328 return ret;
329}
330
331
332
333
334
335
336static int init_range(const char *block_name, void *host_addr,
337 ram_addr_t offset, ram_addr_t length, void *opaque)
338{
339 trace_postcopy_init_range(block_name, host_addr, offset, length);
340
341
342
343
344
345
346
347 if (ram_discard_range(block_name, 0, length)) {
348 return -1;
349 }
350
351 return 0;
352}
353
354
355
356
357
358static int cleanup_range(const char *block_name, void *host_addr,
359 ram_addr_t offset, ram_addr_t length, void *opaque)
360{
361 MigrationIncomingState *mis = opaque;
362 struct uffdio_range range_struct;
363 trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
364
365
366
367
368
369 qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
370
371
372
373
374
375
376 range_struct.start = (uintptr_t)host_addr;
377 range_struct.len = length;
378
379 if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
380 error_report("%s: userfault unregister %s", __func__, strerror(errno));
381
382 return -1;
383 }
384
385 return 0;
386}
387
388
389
390
391
392
393int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
394{
395 if (qemu_ram_foreach_block(init_range, NULL)) {
396 return -1;
397 }
398
399 return 0;
400}
401
402
403
404
405int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
406{
407 trace_postcopy_ram_incoming_cleanup_entry();
408
409 if (mis->have_fault_thread) {
410 Error *local_err = NULL;
411
412 if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
413 error_report_err(local_err);
414 return -1;
415 }
416
417 if (qemu_ram_foreach_block(cleanup_range, mis)) {
418 return -1;
419 }
420
421 atomic_set(&mis->fault_thread_quit, 1);
422 postcopy_fault_thread_notify(mis);
423 trace_postcopy_ram_incoming_cleanup_join();
424 qemu_thread_join(&mis->fault_thread);
425
426 trace_postcopy_ram_incoming_cleanup_closeuf();
427 close(mis->userfault_fd);
428 close(mis->userfault_event_fd);
429 mis->have_fault_thread = false;
430 }
431
432 qemu_balloon_inhibit(false);
433
434 if (enable_mlock) {
435 if (os_mlock() < 0) {
436 error_report("mlock: %s", strerror(errno));
437
438
439
440
441 }
442 }
443
444 postcopy_state_set(POSTCOPY_INCOMING_END);
445
446 if (mis->postcopy_tmp_page) {
447 munmap(mis->postcopy_tmp_page, mis->largest_page_size);
448 mis->postcopy_tmp_page = NULL;
449 }
450 if (mis->postcopy_tmp_zero_page) {
451 munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
452 mis->postcopy_tmp_zero_page = NULL;
453 }
454 trace_postcopy_ram_incoming_cleanup_exit();
455 return 0;
456}
457
458
459
460
461static int nhp_range(const char *block_name, void *host_addr,
462 ram_addr_t offset, ram_addr_t length, void *opaque)
463{
464 trace_postcopy_nhp_range(block_name, host_addr, offset, length);
465
466
467
468
469
470
471 qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
472
473 return 0;
474}
475
476
477
478
479
480
481int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
482{
483 if (qemu_ram_foreach_block(nhp_range, mis)) {
484 return -1;
485 }
486
487 postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
488
489 return 0;
490}
491
492
493
494
495
496
497
498
499
500
501static int ram_block_enable_notify(const char *block_name, void *host_addr,
502 ram_addr_t offset, ram_addr_t length,
503 void *opaque)
504{
505 MigrationIncomingState *mis = opaque;
506 struct uffdio_register reg_struct;
507
508 reg_struct.range.start = (uintptr_t)host_addr;
509 reg_struct.range.len = length;
510 reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
511
512
513 if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, ®_struct)) {
514 error_report("%s userfault register: %s", __func__, strerror(errno));
515 return -1;
516 }
517 if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
518 error_report("%s userfault: Region doesn't support COPY", __func__);
519 return -1;
520 }
521 if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
522 RAMBlock *rb = qemu_ram_block_by_name(block_name);
523 qemu_ram_set_uf_zeroable(rb);
524 }
525
526 return 0;
527}
528
529int postcopy_wake_shared(struct PostCopyFD *pcfd,
530 uint64_t client_addr,
531 RAMBlock *rb)
532{
533 size_t pagesize = qemu_ram_pagesize(rb);
534 struct uffdio_range range;
535 int ret;
536 trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
537 range.start = client_addr & ~(pagesize - 1);
538 range.len = pagesize;
539 ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range);
540 if (ret) {
541 error_report("%s: Failed to wake: %zx in %s (%s)",
542 __func__, (size_t)client_addr, qemu_ram_get_idstr(rb),
543 strerror(errno));
544 }
545 return ret;
546}
547
548
549
550
551
552
553int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
554 uint64_t client_addr, uint64_t rb_offset)
555{
556 size_t pagesize = qemu_ram_pagesize(rb);
557 uint64_t aligned_rbo = rb_offset & ~(pagesize - 1);
558 MigrationIncomingState *mis = migration_incoming_get_current();
559
560 trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
561 rb_offset);
562 if (ramblock_recv_bitmap_test_byte_offset(rb, aligned_rbo)) {
563 trace_postcopy_request_shared_page_present(pcfd->idstr,
564 qemu_ram_get_idstr(rb), rb_offset);
565 return postcopy_wake_shared(pcfd, client_addr, rb);
566 }
567 if (rb != mis->last_rb) {
568 mis->last_rb = rb;
569 migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
570 aligned_rbo, pagesize);
571 } else {
572
573 migrate_send_rp_req_pages(mis, NULL, aligned_rbo, pagesize);
574 }
575 return 0;
576}
577
578
579
580
581static void *postcopy_ram_fault_thread(void *opaque)
582{
583 MigrationIncomingState *mis = opaque;
584 struct uffd_msg msg;
585 int ret;
586 size_t index;
587 RAMBlock *rb = NULL;
588
589 trace_postcopy_ram_fault_thread_entry();
590 mis->last_rb = NULL;
591 qemu_sem_post(&mis->fault_thread_sem);
592
593 struct pollfd *pfd;
594 size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
595
596 pfd = g_new0(struct pollfd, pfd_len);
597
598 pfd[0].fd = mis->userfault_fd;
599 pfd[0].events = POLLIN;
600 pfd[1].fd = mis->userfault_event_fd;
601 pfd[1].events = POLLIN;
602 trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
603 for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
604 struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
605 struct PostCopyFD, index);
606 pfd[2 + index].fd = pcfd->fd;
607 pfd[2 + index].events = POLLIN;
608 trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
609 pcfd->fd);
610 }
611
612 while (true) {
613 ram_addr_t rb_offset;
614 int poll_result;
615
616
617
618
619
620
621
622 poll_result = poll(pfd, pfd_len, -1 );
623 if (poll_result == -1) {
624 error_report("%s: userfault poll: %s", __func__, strerror(errno));
625 break;
626 }
627
628 if (pfd[1].revents) {
629 uint64_t tmp64 = 0;
630
631
632 if (read(mis->userfault_event_fd, &tmp64, 8) != 8) {
633
634 error_report("%s: read() failed", __func__);
635 }
636
637 if (atomic_read(&mis->fault_thread_quit)) {
638 trace_postcopy_ram_fault_thread_quit();
639 break;
640 }
641 }
642
643 if (pfd[0].revents) {
644 poll_result--;
645 ret = read(mis->userfault_fd, &msg, sizeof(msg));
646 if (ret != sizeof(msg)) {
647 if (errno == EAGAIN) {
648
649
650
651
652 continue;
653 }
654 if (ret < 0) {
655 error_report("%s: Failed to read full userfault "
656 "message: %s",
657 __func__, strerror(errno));
658 break;
659 } else {
660 error_report("%s: Read %d bytes from userfaultfd "
661 "expected %zd",
662 __func__, ret, sizeof(msg));
663 break;
664 }
665 }
666 if (msg.event != UFFD_EVENT_PAGEFAULT) {
667 error_report("%s: Read unexpected event %ud from userfaultfd",
668 __func__, msg.event);
669 continue;
670 }
671
672 rb = qemu_ram_block_from_host(
673 (void *)(uintptr_t)msg.arg.pagefault.address,
674 true, &rb_offset);
675 if (!rb) {
676 error_report("postcopy_ram_fault_thread: Fault outside guest: %"
677 PRIx64, (uint64_t)msg.arg.pagefault.address);
678 break;
679 }
680
681 rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
682 trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
683 qemu_ram_get_idstr(rb),
684 rb_offset);
685
686
687
688
689 if (rb != mis->last_rb) {
690 mis->last_rb = rb;
691 migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
692 rb_offset, qemu_ram_pagesize(rb));
693 } else {
694
695 migrate_send_rp_req_pages(mis, NULL,
696 rb_offset, qemu_ram_pagesize(rb));
697 }
698 }
699
700
701
702 for (index = 2; index < pfd_len && poll_result; index++) {
703 if (pfd[index].revents) {
704 struct PostCopyFD *pcfd =
705 &g_array_index(mis->postcopy_remote_fds,
706 struct PostCopyFD, index - 2);
707
708 poll_result--;
709 if (pfd[index].revents & POLLERR) {
710 error_report("%s: POLLERR on poll %zd fd=%d",
711 __func__, index, pcfd->fd);
712 pfd[index].events = 0;
713 continue;
714 }
715
716 ret = read(pcfd->fd, &msg, sizeof(msg));
717 if (ret != sizeof(msg)) {
718 if (errno == EAGAIN) {
719
720
721
722
723 continue;
724 }
725 if (ret < 0) {
726 error_report("%s: Failed to read full userfault "
727 "message: %s (shared) revents=%d",
728 __func__, strerror(errno),
729 pfd[index].revents);
730
731 break;
732 } else {
733 error_report("%s: Read %d bytes from userfaultfd "
734 "expected %zd (shared)",
735 __func__, ret, sizeof(msg));
736
737 break;
738 }
739 }
740 if (msg.event != UFFD_EVENT_PAGEFAULT) {
741 error_report("%s: Read unexpected event %ud "
742 "from userfaultfd (shared)",
743 __func__, msg.event);
744 continue;
745 }
746
747 ret = pcfd->handler(pcfd, &msg);
748 if (ret) {
749 error_report("%s: Failed to resolve shared fault on %zd/%s",
750 __func__, index, pcfd->idstr);
751
752 }
753 }
754 }
755 }
756 trace_postcopy_ram_fault_thread_exit();
757 g_free(pfd);
758 return NULL;
759}
760
761int postcopy_ram_enable_notify(MigrationIncomingState *mis)
762{
763
764 mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
765 if (mis->userfault_fd == -1) {
766 error_report("%s: Failed to open userfault fd: %s", __func__,
767 strerror(errno));
768 return -1;
769 }
770
771
772
773
774
775 if (!ufd_check_and_apply(mis->userfault_fd, mis)) {
776 return -1;
777 }
778
779
780 mis->userfault_event_fd = eventfd(0, EFD_CLOEXEC);
781 if (mis->userfault_event_fd == -1) {
782 error_report("%s: Opening userfault_event_fd: %s", __func__,
783 strerror(errno));
784 close(mis->userfault_fd);
785 return -1;
786 }
787
788 qemu_sem_init(&mis->fault_thread_sem, 0);
789 qemu_thread_create(&mis->fault_thread, "postcopy/fault",
790 postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
791 qemu_sem_wait(&mis->fault_thread_sem);
792 qemu_sem_destroy(&mis->fault_thread_sem);
793 mis->have_fault_thread = true;
794
795
796 if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
797 return -1;
798 }
799
800
801
802
803
804 qemu_balloon_inhibit(true);
805
806 trace_postcopy_ram_enable_notify();
807
808 return 0;
809}
810
811static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
812 void *from_addr, uint64_t pagesize, RAMBlock *rb)
813{
814 int ret;
815 if (from_addr) {
816 struct uffdio_copy copy_struct;
817 copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
818 copy_struct.src = (uint64_t)(uintptr_t)from_addr;
819 copy_struct.len = pagesize;
820 copy_struct.mode = 0;
821 ret = ioctl(userfault_fd, UFFDIO_COPY, ©_struct);
822 } else {
823 struct uffdio_zeropage zero_struct;
824 zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
825 zero_struct.range.len = pagesize;
826 zero_struct.mode = 0;
827 ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct);
828 }
829 if (!ret) {
830 ramblock_recv_bitmap_set_range(rb, host_addr,
831 pagesize / qemu_target_page_size());
832 }
833 return ret;
834}
835
836int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset)
837{
838 int i;
839 MigrationIncomingState *mis = migration_incoming_get_current();
840 GArray *pcrfds = mis->postcopy_remote_fds;
841
842 for (i = 0; i < pcrfds->len; i++) {
843 struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
844 int ret = cur->waker(cur, rb, offset);
845 if (ret) {
846 return ret;
847 }
848 }
849 return 0;
850}
851
852
853
854
855
856int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
857 RAMBlock *rb)
858{
859 size_t pagesize = qemu_ram_pagesize(rb);
860
861
862
863
864
865
866 if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
867 int e = errno;
868 error_report("%s: %s copy host: %p from: %p (size: %zd)",
869 __func__, strerror(e), host, from, pagesize);
870
871 return -e;
872 }
873
874 trace_postcopy_place_page(host);
875 return postcopy_notify_shared_wake(rb,
876 qemu_ram_block_host_offset(rb, host));
877}
878
879
880
881
882
883int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
884 RAMBlock *rb)
885{
886 size_t pagesize = qemu_ram_pagesize(rb);
887 trace_postcopy_place_page_zero(host);
888
889
890
891
892 if (qemu_ram_is_uf_zeroable(rb)) {
893 if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) {
894 int e = errno;
895 error_report("%s: %s zero host: %p",
896 __func__, strerror(e), host);
897
898 return -e;
899 }
900 return postcopy_notify_shared_wake(rb,
901 qemu_ram_block_host_offset(rb,
902 host));
903 } else {
904
905 if (!mis->postcopy_tmp_zero_page) {
906 mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
907 PROT_READ | PROT_WRITE,
908 MAP_PRIVATE | MAP_ANONYMOUS,
909 -1, 0);
910 if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
911 int e = errno;
912 mis->postcopy_tmp_zero_page = NULL;
913 error_report("%s: %s mapping large zero page",
914 __func__, strerror(e));
915 return -e;
916 }
917 memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
918 }
919 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
920 rb);
921 }
922}
923
924
925
926
927
928
929
930
931
932void *postcopy_get_tmp_page(MigrationIncomingState *mis)
933{
934 if (!mis->postcopy_tmp_page) {
935 mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
936 PROT_READ | PROT_WRITE, MAP_PRIVATE |
937 MAP_ANONYMOUS, -1, 0);
938 if (mis->postcopy_tmp_page == MAP_FAILED) {
939 mis->postcopy_tmp_page = NULL;
940 error_report("%s: %s", __func__, strerror(errno));
941 return NULL;
942 }
943 }
944
945 return mis->postcopy_tmp_page;
946}
947
948#else
949
950bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
951{
952 error_report("%s: No OS support", __func__);
953 return false;
954}
955
956int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
957{
958 error_report("postcopy_ram_incoming_init: No OS support");
959 return -1;
960}
961
962int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
963{
964 assert(0);
965 return -1;
966}
967
968int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
969{
970 assert(0);
971 return -1;
972}
973
974int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
975 uint64_t client_addr, uint64_t rb_offset)
976{
977 assert(0);
978 return -1;
979}
980
981int postcopy_ram_enable_notify(MigrationIncomingState *mis)
982{
983 assert(0);
984 return -1;
985}
986
987int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
988 RAMBlock *rb)
989{
990 assert(0);
991 return -1;
992}
993
994int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
995 RAMBlock *rb)
996{
997 assert(0);
998 return -1;
999}
1000
1001void *postcopy_get_tmp_page(MigrationIncomingState *mis)
1002{
1003 assert(0);
1004 return NULL;
1005}
1006
1007int postcopy_wake_shared(struct PostCopyFD *pcfd,
1008 uint64_t client_addr,
1009 RAMBlock *rb)
1010{
1011 assert(0);
1012 return -1;
1013}
1014#endif
1015
1016
1017
1018void postcopy_fault_thread_notify(MigrationIncomingState *mis)
1019{
1020 uint64_t tmp64 = 1;
1021
1022
1023
1024
1025
1026 if (write(mis->userfault_event_fd, &tmp64, 8) != 8) {
1027
1028 error_report("%s: incrementing failed: %s", __func__,
1029 strerror(errno));
1030 }
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
1045 const char *name)
1046{
1047 PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
1048
1049 if (res) {
1050 res->ramblock_name = name;
1051 }
1052
1053 return res;
1054}
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
1067 unsigned long start, unsigned long length)
1068{
1069 size_t tp_size = qemu_target_page_size();
1070
1071 pds->start_list[pds->cur_entry] = start * tp_size;
1072 pds->length_list[pds->cur_entry] = length * tp_size;
1073 trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
1074 pds->cur_entry++;
1075 pds->nsentwords++;
1076
1077 if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
1078
1079 qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
1080 pds->ramblock_name,
1081 pds->cur_entry,
1082 pds->start_list,
1083 pds->length_list);
1084 pds->nsentcmds++;
1085 pds->cur_entry = 0;
1086 }
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
1097{
1098
1099 if (pds->cur_entry) {
1100 qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
1101 pds->ramblock_name,
1102 pds->cur_entry,
1103 pds->start_list,
1104 pds->length_list);
1105 pds->nsentcmds++;
1106 }
1107
1108 trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
1109 pds->nsentcmds);
1110
1111 g_free(pds);
1112}
1113
1114
1115
1116
1117
1118
1119static PostcopyState incoming_postcopy_state;
1120
1121PostcopyState postcopy_state_get(void)
1122{
1123 return atomic_mb_read(&incoming_postcopy_state);
1124}
1125
1126
1127PostcopyState postcopy_state_set(PostcopyState new_state)
1128{
1129 return atomic_xchg(&incoming_postcopy_state, new_state);
1130}
1131
1132
1133
1134
1135void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
1136{
1137 MigrationIncomingState *mis = migration_incoming_get_current();
1138
1139 mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
1140 *pcfd);
1141}
1142
1143
1144
1145void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
1146{
1147 guint i;
1148 MigrationIncomingState *mis = migration_incoming_get_current();
1149 GArray *pcrfds = mis->postcopy_remote_fds;
1150
1151 for (i = 0; i < pcrfds->len; i++) {
1152 struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
1153 if (cur->fd == pcfd->fd) {
1154 mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
1155 return;
1156 }
1157 }
1158}
1159