1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "cpu.h"
31#include <zlib.h>
32#include "qemu/cutils.h"
33#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
35#include "qemu/main-loop.h"
36#include "qemu/pmem.h"
37#include "xbzrle.h"
38#include "ram.h"
39#include "migration.h"
40#include "socket.h"
41#include "migration/register.h"
42#include "migration/misc.h"
43#include "qemu-file.h"
44#include "postcopy-ram.h"
45#include "page_cache.h"
46#include "qemu/error-report.h"
47#include "qapi/error.h"
48#include "qapi/qapi-events-migration.h"
49#include "qapi/qmp/qerror.h"
50#include "trace.h"
51#include "exec/ram_addr.h"
52#include "exec/target_page.h"
53#include "qemu/rcu_queue.h"
54#include "migration/colo.h"
55#include "block.h"
56#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
58#include "savevm.h"
59#include "qemu/iov.h"
60
61
62
63
64
65
66
67
68
69
70#define RAM_SAVE_FLAG_FULL 0x01
71#define RAM_SAVE_FLAG_ZERO 0x02
72#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
80static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
82 return buffer_is_zero(p, size);
83}
84
85XBZRLECacheStats xbzrle_counters;
86
87
88
89static struct {
90
91 uint8_t *encoded_buf;
92
93 uint8_t *current_buf;
94
95 PageCache *cache;
96 QemuMutex lock;
97
98 uint8_t *zero_target_page;
99
100 uint8_t *decoded_buf;
101} XBZRLE;
102
103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128int xbzrle_cache_resize(int64_t new_size, Error **errp)
129{
130 PageCache *new_cache;
131 int64_t ret = 0;
132
133
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
140 if (new_size == migrate_xbzrle_cache_size()) {
141
142 return 0;
143 }
144
145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
149 if (!new_cache) {
150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
162static bool ramblock_is_ignored(RAMBlock *block)
163{
164 return !qemu_ram_is_migratable(block) ||
165 (migrate_ignore_shared() && qemu_ram_is_shared(block));
166}
167
168
169#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
170 INTERNAL_RAMBLOCK_FOREACH(block) \
171 if (ramblock_is_ignored(block)) {} else
172
173#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
174 INTERNAL_RAMBLOCK_FOREACH(block) \
175 if (!qemu_ram_is_migratable(block)) {} else
176
177#undef RAMBLOCK_FOREACH
178
179int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
180{
181 RAMBlock *block;
182 int ret = 0;
183
184 RCU_READ_LOCK_GUARD();
185
186 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
187 ret = func(block, opaque);
188 if (ret) {
189 break;
190 }
191 }
192 return ret;
193}
194
195static void ramblock_recv_map_init(void)
196{
197 RAMBlock *rb;
198
199 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
200 assert(!rb->receivedmap);
201 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
202 }
203}
204
205int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
206{
207 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
208 rb->receivedmap);
209}
210
211bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
212{
213 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
214}
215
216void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
217{
218 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
219}
220
221void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
222 size_t nr)
223{
224 bitmap_set_atomic(rb->receivedmap,
225 ramblock_recv_bitmap_offset(host_addr, rb),
226 nr);
227}
228
229#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
230
231
232
233
234
235
236int64_t ramblock_recv_bitmap_send(QEMUFile *file,
237 const char *block_name)
238{
239 RAMBlock *block = qemu_ram_block_by_name(block_name);
240 unsigned long *le_bitmap, nbits;
241 uint64_t size;
242
243 if (!block) {
244 error_report("%s: invalid block name: %s", __func__, block_name);
245 return -1;
246 }
247
248 nbits = block->used_length >> TARGET_PAGE_BITS;
249
250
251
252
253
254
255 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
256
257
258
259
260
261
262 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
263
264
265 size = DIV_ROUND_UP(nbits, 8);
266
267
268
269
270
271
272
273 size = ROUND_UP(size, 8);
274
275 qemu_put_be64(file, size);
276 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
277
278
279
280
281 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
282 qemu_fflush(file);
283
284 g_free(le_bitmap);
285
286 if (qemu_file_get_error(file)) {
287 return qemu_file_get_error(file);
288 }
289
290 return size + sizeof(size);
291}
292
293
294
295
296
297struct RAMSrcPageRequest {
298 RAMBlock *rb;
299 hwaddr offset;
300 hwaddr len;
301
302 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
303};
304
305
306struct RAMState {
307
308 QEMUFile *f;
309
310 RAMBlock *last_seen_block;
311
312 RAMBlock *last_sent_block;
313
314 ram_addr_t last_page;
315
316 uint32_t last_version;
317
318 bool ram_bulk_stage;
319
320 bool fpo_enabled;
321
322 int dirty_rate_high_cnt;
323
324
325 int64_t time_last_bitmap_sync;
326
327 uint64_t bytes_xfer_prev;
328
329 uint64_t num_dirty_pages_period;
330
331 uint64_t xbzrle_cache_miss_prev;
332
333
334
335 uint64_t compress_thread_busy_prev;
336
337 uint64_t compressed_size_prev;
338
339 uint64_t compress_pages_prev;
340
341
342 uint64_t target_page_count_prev;
343
344 uint64_t target_page_count;
345
346 uint64_t migration_dirty_pages;
347
348 QemuMutex bitmap_mutex;
349
350 RAMBlock *last_req_rb;
351
352 QemuMutex src_page_req_mutex;
353 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
354};
355typedef struct RAMState RAMState;
356
357static RAMState *ram_state;
358
359static NotifierWithReturnList precopy_notifier_list;
360
361void precopy_infrastructure_init(void)
362{
363 notifier_with_return_list_init(&precopy_notifier_list);
364}
365
366void precopy_add_notifier(NotifierWithReturn *n)
367{
368 notifier_with_return_list_add(&precopy_notifier_list, n);
369}
370
371void precopy_remove_notifier(NotifierWithReturn *n)
372{
373 notifier_with_return_remove(n);
374}
375
376int precopy_notify(PrecopyNotifyReason reason, Error **errp)
377{
378 PrecopyNotifyData pnd;
379 pnd.reason = reason;
380 pnd.errp = errp;
381
382 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
383}
384
385void precopy_enable_free_page_optimization(void)
386{
387 if (!ram_state) {
388 return;
389 }
390
391 ram_state->fpo_enabled = true;
392}
393
394uint64_t ram_bytes_remaining(void)
395{
396 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
397 0;
398}
399
400MigrationStats ram_counters;
401
402
403struct PageSearchStatus {
404
405 RAMBlock *block;
406
407 unsigned long page;
408
409 bool complete_round;
410};
411typedef struct PageSearchStatus PageSearchStatus;
412
413CompressionStats compression_counters;
414
415struct CompressParam {
416 bool done;
417 bool quit;
418 bool zero_page;
419 QEMUFile *file;
420 QemuMutex mutex;
421 QemuCond cond;
422 RAMBlock *block;
423 ram_addr_t offset;
424
425
426 z_stream stream;
427 uint8_t *originbuf;
428};
429typedef struct CompressParam CompressParam;
430
431struct DecompressParam {
432 bool done;
433 bool quit;
434 QemuMutex mutex;
435 QemuCond cond;
436 void *des;
437 uint8_t *compbuf;
438 int len;
439 z_stream stream;
440};
441typedef struct DecompressParam DecompressParam;
442
443static CompressParam *comp_param;
444static QemuThread *compress_threads;
445
446
447
448
449static QemuMutex comp_done_lock;
450static QemuCond comp_done_cond;
451
452static const QEMUFileOps empty_ops = { };
453
454static QEMUFile *decomp_file;
455static DecompressParam *decomp_param;
456static QemuThread *decompress_threads;
457static QemuMutex decomp_done_lock;
458static QemuCond decomp_done_cond;
459
460static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
461 ram_addr_t offset, uint8_t *source_buf);
462
463static void *do_data_compress(void *opaque)
464{
465 CompressParam *param = opaque;
466 RAMBlock *block;
467 ram_addr_t offset;
468 bool zero_page;
469
470 qemu_mutex_lock(¶m->mutex);
471 while (!param->quit) {
472 if (param->block) {
473 block = param->block;
474 offset = param->offset;
475 param->block = NULL;
476 qemu_mutex_unlock(¶m->mutex);
477
478 zero_page = do_compress_ram_page(param->file, ¶m->stream,
479 block, offset, param->originbuf);
480
481 qemu_mutex_lock(&comp_done_lock);
482 param->done = true;
483 param->zero_page = zero_page;
484 qemu_cond_signal(&comp_done_cond);
485 qemu_mutex_unlock(&comp_done_lock);
486
487 qemu_mutex_lock(¶m->mutex);
488 } else {
489 qemu_cond_wait(¶m->cond, ¶m->mutex);
490 }
491 }
492 qemu_mutex_unlock(¶m->mutex);
493
494 return NULL;
495}
496
497static void compress_threads_save_cleanup(void)
498{
499 int i, thread_count;
500
501 if (!migrate_use_compression() || !comp_param) {
502 return;
503 }
504
505 thread_count = migrate_compress_threads();
506 for (i = 0; i < thread_count; i++) {
507
508
509
510
511 if (!comp_param[i].file) {
512 break;
513 }
514
515 qemu_mutex_lock(&comp_param[i].mutex);
516 comp_param[i].quit = true;
517 qemu_cond_signal(&comp_param[i].cond);
518 qemu_mutex_unlock(&comp_param[i].mutex);
519
520 qemu_thread_join(compress_threads + i);
521 qemu_mutex_destroy(&comp_param[i].mutex);
522 qemu_cond_destroy(&comp_param[i].cond);
523 deflateEnd(&comp_param[i].stream);
524 g_free(comp_param[i].originbuf);
525 qemu_fclose(comp_param[i].file);
526 comp_param[i].file = NULL;
527 }
528 qemu_mutex_destroy(&comp_done_lock);
529 qemu_cond_destroy(&comp_done_cond);
530 g_free(compress_threads);
531 g_free(comp_param);
532 compress_threads = NULL;
533 comp_param = NULL;
534}
535
536static int compress_threads_save_setup(void)
537{
538 int i, thread_count;
539
540 if (!migrate_use_compression()) {
541 return 0;
542 }
543 thread_count = migrate_compress_threads();
544 compress_threads = g_new0(QemuThread, thread_count);
545 comp_param = g_new0(CompressParam, thread_count);
546 qemu_cond_init(&comp_done_cond);
547 qemu_mutex_init(&comp_done_lock);
548 for (i = 0; i < thread_count; i++) {
549 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
550 if (!comp_param[i].originbuf) {
551 goto exit;
552 }
553
554 if (deflateInit(&comp_param[i].stream,
555 migrate_compress_level()) != Z_OK) {
556 g_free(comp_param[i].originbuf);
557 goto exit;
558 }
559
560
561
562
563 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
564 comp_param[i].done = true;
565 comp_param[i].quit = false;
566 qemu_mutex_init(&comp_param[i].mutex);
567 qemu_cond_init(&comp_param[i].cond);
568 qemu_thread_create(compress_threads + i, "compress",
569 do_data_compress, comp_param + i,
570 QEMU_THREAD_JOINABLE);
571 }
572 return 0;
573
574exit:
575 compress_threads_save_cleanup();
576 return -1;
577}
578
579
580
581#define MULTIFD_MAGIC 0x11223344U
582#define MULTIFD_VERSION 1
583
584#define MULTIFD_FLAG_SYNC (1 << 0)
585
586
587#define MULTIFD_PACKET_SIZE (512 * 1024)
588
589typedef struct {
590 uint32_t magic;
591 uint32_t version;
592 unsigned char uuid[16];
593 uint8_t id;
594 uint8_t unused1[7];
595 uint64_t unused2[4];
596} __attribute__((packed)) MultiFDInit_t;
597
598typedef struct {
599 uint32_t magic;
600 uint32_t version;
601 uint32_t flags;
602
603 uint32_t pages_alloc;
604 uint32_t pages_used;
605
606 uint32_t next_packet_size;
607 uint64_t packet_num;
608 uint64_t unused[4];
609 char ramblock[256];
610 uint64_t offset[];
611} __attribute__((packed)) MultiFDPacket_t;
612
613typedef struct {
614
615 uint32_t used;
616
617 uint32_t allocated;
618
619 uint64_t packet_num;
620
621 ram_addr_t *offset;
622
623 struct iovec *iov;
624 RAMBlock *block;
625} MultiFDPages_t;
626
627typedef struct {
628
629
630 uint8_t id;
631
632 char *name;
633
634 QemuThread thread;
635
636 QIOChannel *c;
637
638 QemuSemaphore sem;
639
640 QemuMutex mutex;
641
642 bool running;
643
644 bool quit;
645
646 int pending_job;
647
648 MultiFDPages_t *pages;
649
650 uint32_t packet_len;
651
652 MultiFDPacket_t *packet;
653
654 uint32_t flags;
655
656 uint32_t next_packet_size;
657
658 uint64_t packet_num;
659
660
661 uint64_t num_packets;
662
663 uint64_t num_pages;
664
665 QemuSemaphore sem_sync;
666} MultiFDSendParams;
667
668typedef struct {
669
670
671 uint8_t id;
672
673 char *name;
674
675 QemuThread thread;
676
677 QIOChannel *c;
678
679 QemuMutex mutex;
680
681 bool running;
682
683 bool quit;
684
685 MultiFDPages_t *pages;
686
687 uint32_t packet_len;
688
689 MultiFDPacket_t *packet;
690
691 uint32_t flags;
692
693 uint64_t packet_num;
694
695
696 uint32_t next_packet_size;
697
698 uint64_t num_packets;
699
700 uint64_t num_pages;
701
702 QemuSemaphore sem_sync;
703} MultiFDRecvParams;
704
705static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
706{
707 MultiFDInit_t msg;
708 int ret;
709
710 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
711 msg.version = cpu_to_be32(MULTIFD_VERSION);
712 msg.id = p->id;
713 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
714
715 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
716 if (ret != 0) {
717 return -1;
718 }
719 return 0;
720}
721
722static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
723{
724 MultiFDInit_t msg;
725 int ret;
726
727 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
728 if (ret != 0) {
729 return -1;
730 }
731
732 msg.magic = be32_to_cpu(msg.magic);
733 msg.version = be32_to_cpu(msg.version);
734
735 if (msg.magic != MULTIFD_MAGIC) {
736 error_setg(errp, "multifd: received packet magic %x "
737 "expected %x", msg.magic, MULTIFD_MAGIC);
738 return -1;
739 }
740
741 if (msg.version != MULTIFD_VERSION) {
742 error_setg(errp, "multifd: received packet version %d "
743 "expected %d", msg.version, MULTIFD_VERSION);
744 return -1;
745 }
746
747 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
748 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
749 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
750
751 error_setg(errp, "multifd: received uuid '%s' and expected "
752 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
753 g_free(uuid);
754 g_free(msg_uuid);
755 return -1;
756 }
757
758 if (msg.id > migrate_multifd_channels()) {
759 error_setg(errp, "multifd: received channel version %d "
760 "expected %d", msg.version, MULTIFD_VERSION);
761 return -1;
762 }
763
764 return msg.id;
765}
766
767static MultiFDPages_t *multifd_pages_init(size_t size)
768{
769 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
770
771 pages->allocated = size;
772 pages->iov = g_new0(struct iovec, size);
773 pages->offset = g_new0(ram_addr_t, size);
774
775 return pages;
776}
777
778static void multifd_pages_clear(MultiFDPages_t *pages)
779{
780 pages->used = 0;
781 pages->allocated = 0;
782 pages->packet_num = 0;
783 pages->block = NULL;
784 g_free(pages->iov);
785 pages->iov = NULL;
786 g_free(pages->offset);
787 pages->offset = NULL;
788 g_free(pages);
789}
790
791static void multifd_send_fill_packet(MultiFDSendParams *p)
792{
793 MultiFDPacket_t *packet = p->packet;
794 int i;
795
796 packet->flags = cpu_to_be32(p->flags);
797 packet->pages_alloc = cpu_to_be32(p->pages->allocated);
798 packet->pages_used = cpu_to_be32(p->pages->used);
799 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
800 packet->packet_num = cpu_to_be64(p->packet_num);
801
802 if (p->pages->block) {
803 strncpy(packet->ramblock, p->pages->block->idstr, 256);
804 }
805
806 for (i = 0; i < p->pages->used; i++) {
807 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
808 }
809}
810
811static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
812{
813 MultiFDPacket_t *packet = p->packet;
814 uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
815 RAMBlock *block;
816 int i;
817
818 packet->magic = be32_to_cpu(packet->magic);
819 if (packet->magic != MULTIFD_MAGIC) {
820 error_setg(errp, "multifd: received packet "
821 "magic %x and expected magic %x",
822 packet->magic, MULTIFD_MAGIC);
823 return -1;
824 }
825
826 packet->version = be32_to_cpu(packet->version);
827 if (packet->version != MULTIFD_VERSION) {
828 error_setg(errp, "multifd: received packet "
829 "version %d and expected version %d",
830 packet->version, MULTIFD_VERSION);
831 return -1;
832 }
833
834 p->flags = be32_to_cpu(packet->flags);
835
836 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
837
838
839
840
841 if (packet->pages_alloc > pages_max * 100) {
842 error_setg(errp, "multifd: received packet "
843 "with size %d and expected a maximum size of %d",
844 packet->pages_alloc, pages_max * 100) ;
845 return -1;
846 }
847
848
849
850
851 if (packet->pages_alloc > p->pages->allocated) {
852 multifd_pages_clear(p->pages);
853 p->pages = multifd_pages_init(packet->pages_alloc);
854 }
855
856 p->pages->used = be32_to_cpu(packet->pages_used);
857 if (p->pages->used > packet->pages_alloc) {
858 error_setg(errp, "multifd: received packet "
859 "with %d pages and expected maximum pages are %d",
860 p->pages->used, packet->pages_alloc) ;
861 return -1;
862 }
863
864 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
865 p->packet_num = be64_to_cpu(packet->packet_num);
866
867 if (p->pages->used) {
868
869 packet->ramblock[255] = 0;
870 block = qemu_ram_block_by_name(packet->ramblock);
871 if (!block) {
872 error_setg(errp, "multifd: unknown ram block %s",
873 packet->ramblock);
874 return -1;
875 }
876 }
877
878 for (i = 0; i < p->pages->used; i++) {
879 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
880
881 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
882 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
883 " (max " RAM_ADDR_FMT ")",
884 offset, block->max_length);
885 return -1;
886 }
887 p->pages->iov[i].iov_base = block->host + offset;
888 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
889 }
890
891 return 0;
892}
893
894struct {
895 MultiFDSendParams *params;
896
897 MultiFDPages_t *pages;
898
899 uint64_t packet_num;
900
901 QemuSemaphore channels_ready;
902} *multifd_send_state;
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922static int multifd_send_pages(RAMState *rs)
923{
924 int i;
925 static int next_channel;
926 MultiFDSendParams *p = NULL;
927 MultiFDPages_t *pages = multifd_send_state->pages;
928 uint64_t transferred;
929
930 qemu_sem_wait(&multifd_send_state->channels_ready);
931 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
932 p = &multifd_send_state->params[i];
933
934 qemu_mutex_lock(&p->mutex);
935 if (p->quit) {
936 error_report("%s: channel %d has already quit!", __func__, i);
937 qemu_mutex_unlock(&p->mutex);
938 return -1;
939 }
940 if (!p->pending_job) {
941 p->pending_job++;
942 next_channel = (i + 1) % migrate_multifd_channels();
943 break;
944 }
945 qemu_mutex_unlock(&p->mutex);
946 }
947 p->pages->used = 0;
948
949 p->packet_num = multifd_send_state->packet_num++;
950 p->pages->block = NULL;
951 multifd_send_state->pages = p->pages;
952 p->pages = pages;
953 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
954 qemu_file_update_transfer(rs->f, transferred);
955 ram_counters.multifd_bytes += transferred;
956 ram_counters.transferred += transferred;;
957 qemu_mutex_unlock(&p->mutex);
958 qemu_sem_post(&p->sem);
959
960 return 1;
961}
962
963static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
964{
965 MultiFDPages_t *pages = multifd_send_state->pages;
966
967 if (!pages->block) {
968 pages->block = block;
969 }
970
971 if (pages->block == block) {
972 pages->offset[pages->used] = offset;
973 pages->iov[pages->used].iov_base = block->host + offset;
974 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
975 pages->used++;
976
977 if (pages->used < pages->allocated) {
978 return 1;
979 }
980 }
981
982 if (multifd_send_pages(rs) < 0) {
983 return -1;
984 }
985
986 if (pages->block != block) {
987 return multifd_queue_page(rs, block, offset);
988 }
989
990 return 1;
991}
992
993static void multifd_send_terminate_threads(Error *err)
994{
995 int i;
996
997 trace_multifd_send_terminate_threads(err != NULL);
998
999 if (err) {
1000 MigrationState *s = migrate_get_current();
1001 migrate_set_error(s, err);
1002 if (s->state == MIGRATION_STATUS_SETUP ||
1003 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
1004 s->state == MIGRATION_STATUS_DEVICE ||
1005 s->state == MIGRATION_STATUS_ACTIVE) {
1006 migrate_set_state(&s->state, s->state,
1007 MIGRATION_STATUS_FAILED);
1008 }
1009 }
1010
1011 for (i = 0; i < migrate_multifd_channels(); i++) {
1012 MultiFDSendParams *p = &multifd_send_state->params[i];
1013
1014 qemu_mutex_lock(&p->mutex);
1015 p->quit = true;
1016 qemu_sem_post(&p->sem);
1017 qemu_mutex_unlock(&p->mutex);
1018 }
1019}
1020
1021void multifd_save_cleanup(void)
1022{
1023 int i;
1024
1025 if (!migrate_use_multifd()) {
1026 return;
1027 }
1028 multifd_send_terminate_threads(NULL);
1029 for (i = 0; i < migrate_multifd_channels(); i++) {
1030 MultiFDSendParams *p = &multifd_send_state->params[i];
1031
1032 if (p->running) {
1033 qemu_thread_join(&p->thread);
1034 }
1035 socket_send_channel_destroy(p->c);
1036 p->c = NULL;
1037 qemu_mutex_destroy(&p->mutex);
1038 qemu_sem_destroy(&p->sem);
1039 qemu_sem_destroy(&p->sem_sync);
1040 g_free(p->name);
1041 p->name = NULL;
1042 multifd_pages_clear(p->pages);
1043 p->pages = NULL;
1044 p->packet_len = 0;
1045 g_free(p->packet);
1046 p->packet = NULL;
1047 }
1048 qemu_sem_destroy(&multifd_send_state->channels_ready);
1049 g_free(multifd_send_state->params);
1050 multifd_send_state->params = NULL;
1051 multifd_pages_clear(multifd_send_state->pages);
1052 multifd_send_state->pages = NULL;
1053 g_free(multifd_send_state);
1054 multifd_send_state = NULL;
1055}
1056
1057static void multifd_send_sync_main(RAMState *rs)
1058{
1059 int i;
1060
1061 if (!migrate_use_multifd()) {
1062 return;
1063 }
1064 if (multifd_send_state->pages->used) {
1065 if (multifd_send_pages(rs) < 0) {
1066 error_report("%s: multifd_send_pages fail", __func__);
1067 return;
1068 }
1069 }
1070 for (i = 0; i < migrate_multifd_channels(); i++) {
1071 MultiFDSendParams *p = &multifd_send_state->params[i];
1072
1073 trace_multifd_send_sync_main_signal(p->id);
1074
1075 qemu_mutex_lock(&p->mutex);
1076
1077 if (p->quit) {
1078 error_report("%s: channel %d has already quit", __func__, i);
1079 qemu_mutex_unlock(&p->mutex);
1080 return;
1081 }
1082
1083 p->packet_num = multifd_send_state->packet_num++;
1084 p->flags |= MULTIFD_FLAG_SYNC;
1085 p->pending_job++;
1086 qemu_file_update_transfer(rs->f, p->packet_len);
1087 ram_counters.multifd_bytes += p->packet_len;
1088 ram_counters.transferred += p->packet_len;
1089 qemu_mutex_unlock(&p->mutex);
1090 qemu_sem_post(&p->sem);
1091 }
1092 for (i = 0; i < migrate_multifd_channels(); i++) {
1093 MultiFDSendParams *p = &multifd_send_state->params[i];
1094
1095 trace_multifd_send_sync_main_wait(p->id);
1096 qemu_sem_wait(&p->sem_sync);
1097 }
1098 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1099}
1100
1101static void *multifd_send_thread(void *opaque)
1102{
1103 MultiFDSendParams *p = opaque;
1104 Error *local_err = NULL;
1105 int ret = 0;
1106 uint32_t flags = 0;
1107
1108 trace_multifd_send_thread_start(p->id);
1109 rcu_register_thread();
1110
1111 if (multifd_send_initial_packet(p, &local_err) < 0) {
1112 ret = -1;
1113 goto out;
1114 }
1115
1116 p->num_packets = 1;
1117
1118 while (true) {
1119 qemu_sem_wait(&p->sem);
1120 qemu_mutex_lock(&p->mutex);
1121
1122 if (p->pending_job) {
1123 uint32_t used = p->pages->used;
1124 uint64_t packet_num = p->packet_num;
1125 flags = p->flags;
1126
1127 p->next_packet_size = used * qemu_target_page_size();
1128 multifd_send_fill_packet(p);
1129 p->flags = 0;
1130 p->num_packets++;
1131 p->num_pages += used;
1132 qemu_mutex_unlock(&p->mutex);
1133
1134 trace_multifd_send(p->id, packet_num, used, flags,
1135 p->next_packet_size);
1136
1137 ret = qio_channel_write_all(p->c, (void *)p->packet,
1138 p->packet_len, &local_err);
1139 if (ret != 0) {
1140 break;
1141 }
1142
1143 if (used) {
1144 ret = qio_channel_writev_all(p->c, p->pages->iov,
1145 used, &local_err);
1146 if (ret != 0) {
1147 break;
1148 }
1149 }
1150
1151 qemu_mutex_lock(&p->mutex);
1152 p->pending_job--;
1153 qemu_mutex_unlock(&p->mutex);
1154
1155 if (flags & MULTIFD_FLAG_SYNC) {
1156 qemu_sem_post(&p->sem_sync);
1157 }
1158 qemu_sem_post(&multifd_send_state->channels_ready);
1159 } else if (p->quit) {
1160 qemu_mutex_unlock(&p->mutex);
1161 break;
1162 } else {
1163 qemu_mutex_unlock(&p->mutex);
1164
1165 }
1166 }
1167
1168out:
1169 if (local_err) {
1170 trace_multifd_send_error(p->id);
1171 multifd_send_terminate_threads(local_err);
1172 }
1173
1174
1175
1176
1177
1178 if (ret != 0) {
1179 qemu_sem_post(&p->sem_sync);
1180 qemu_sem_post(&multifd_send_state->channels_ready);
1181 }
1182
1183 qemu_mutex_lock(&p->mutex);
1184 p->running = false;
1185 qemu_mutex_unlock(&p->mutex);
1186
1187 rcu_unregister_thread();
1188 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1189
1190 return NULL;
1191}
1192
1193static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1194{
1195 MultiFDSendParams *p = opaque;
1196 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1197 Error *local_err = NULL;
1198
1199 trace_multifd_new_send_channel_async(p->id);
1200 if (qio_task_propagate_error(task, &local_err)) {
1201 migrate_set_error(migrate_get_current(), local_err);
1202 multifd_save_cleanup();
1203 } else {
1204 p->c = QIO_CHANNEL(sioc);
1205 qio_channel_set_delay(p->c, false);
1206 p->running = true;
1207 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1208 QEMU_THREAD_JOINABLE);
1209 }
1210}
1211
1212int multifd_save_setup(void)
1213{
1214 int thread_count;
1215 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
1216 uint8_t i;
1217
1218 if (!migrate_use_multifd()) {
1219 return 0;
1220 }
1221 thread_count = migrate_multifd_channels();
1222 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1223 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
1224 multifd_send_state->pages = multifd_pages_init(page_count);
1225 qemu_sem_init(&multifd_send_state->channels_ready, 0);
1226
1227 for (i = 0; i < thread_count; i++) {
1228 MultiFDSendParams *p = &multifd_send_state->params[i];
1229
1230 qemu_mutex_init(&p->mutex);
1231 qemu_sem_init(&p->sem, 0);
1232 qemu_sem_init(&p->sem_sync, 0);
1233 p->quit = false;
1234 p->pending_job = 0;
1235 p->id = i;
1236 p->pages = multifd_pages_init(page_count);
1237 p->packet_len = sizeof(MultiFDPacket_t)
1238 + sizeof(ram_addr_t) * page_count;
1239 p->packet = g_malloc0(p->packet_len);
1240 p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
1241 p->packet->version = cpu_to_be32(MULTIFD_VERSION);
1242 p->name = g_strdup_printf("multifdsend_%d", i);
1243 socket_send_channel_create(multifd_new_send_channel_async, p);
1244 }
1245 return 0;
1246}
1247
1248struct {
1249 MultiFDRecvParams *params;
1250
1251 int count;
1252
1253 QemuSemaphore sem_sync;
1254
1255 uint64_t packet_num;
1256} *multifd_recv_state;
1257
1258static void multifd_recv_terminate_threads(Error *err)
1259{
1260 int i;
1261
1262 trace_multifd_recv_terminate_threads(err != NULL);
1263
1264 if (err) {
1265 MigrationState *s = migrate_get_current();
1266 migrate_set_error(s, err);
1267 if (s->state == MIGRATION_STATUS_SETUP ||
1268 s->state == MIGRATION_STATUS_ACTIVE) {
1269 migrate_set_state(&s->state, s->state,
1270 MIGRATION_STATUS_FAILED);
1271 }
1272 }
1273
1274 for (i = 0; i < migrate_multifd_channels(); i++) {
1275 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1276
1277 qemu_mutex_lock(&p->mutex);
1278 p->quit = true;
1279
1280
1281
1282
1283 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1284 qemu_mutex_unlock(&p->mutex);
1285 }
1286}
1287
1288int multifd_load_cleanup(Error **errp)
1289{
1290 int i;
1291 int ret = 0;
1292
1293 if (!migrate_use_multifd()) {
1294 return 0;
1295 }
1296 multifd_recv_terminate_threads(NULL);
1297 for (i = 0; i < migrate_multifd_channels(); i++) {
1298 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1299
1300 if (p->running) {
1301 p->quit = true;
1302
1303
1304
1305
1306 qemu_sem_post(&p->sem_sync);
1307 qemu_thread_join(&p->thread);
1308 }
1309 object_unref(OBJECT(p->c));
1310 p->c = NULL;
1311 qemu_mutex_destroy(&p->mutex);
1312 qemu_sem_destroy(&p->sem_sync);
1313 g_free(p->name);
1314 p->name = NULL;
1315 multifd_pages_clear(p->pages);
1316 p->pages = NULL;
1317 p->packet_len = 0;
1318 g_free(p->packet);
1319 p->packet = NULL;
1320 }
1321 qemu_sem_destroy(&multifd_recv_state->sem_sync);
1322 g_free(multifd_recv_state->params);
1323 multifd_recv_state->params = NULL;
1324 g_free(multifd_recv_state);
1325 multifd_recv_state = NULL;
1326
1327 return ret;
1328}
1329
1330static void multifd_recv_sync_main(void)
1331{
1332 int i;
1333
1334 if (!migrate_use_multifd()) {
1335 return;
1336 }
1337 for (i = 0; i < migrate_multifd_channels(); i++) {
1338 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1339
1340 trace_multifd_recv_sync_main_wait(p->id);
1341 qemu_sem_wait(&multifd_recv_state->sem_sync);
1342 }
1343 for (i = 0; i < migrate_multifd_channels(); i++) {
1344 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1345
1346 qemu_mutex_lock(&p->mutex);
1347 if (multifd_recv_state->packet_num < p->packet_num) {
1348 multifd_recv_state->packet_num = p->packet_num;
1349 }
1350 qemu_mutex_unlock(&p->mutex);
1351 trace_multifd_recv_sync_main_signal(p->id);
1352 qemu_sem_post(&p->sem_sync);
1353 }
1354 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1355}
1356
1357static void *multifd_recv_thread(void *opaque)
1358{
1359 MultiFDRecvParams *p = opaque;
1360 Error *local_err = NULL;
1361 int ret;
1362
1363 trace_multifd_recv_thread_start(p->id);
1364 rcu_register_thread();
1365
1366 while (true) {
1367 uint32_t used;
1368 uint32_t flags;
1369
1370 if (p->quit) {
1371 break;
1372 }
1373
1374 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1375 p->packet_len, &local_err);
1376 if (ret == 0) {
1377 break;
1378 }
1379 if (ret == -1) {
1380 break;
1381 }
1382
1383 qemu_mutex_lock(&p->mutex);
1384 ret = multifd_recv_unfill_packet(p, &local_err);
1385 if (ret) {
1386 qemu_mutex_unlock(&p->mutex);
1387 break;
1388 }
1389
1390 used = p->pages->used;
1391 flags = p->flags;
1392 trace_multifd_recv(p->id, p->packet_num, used, flags,
1393 p->next_packet_size);
1394 p->num_packets++;
1395 p->num_pages += used;
1396 qemu_mutex_unlock(&p->mutex);
1397
1398 if (used) {
1399 ret = qio_channel_readv_all(p->c, p->pages->iov,
1400 used, &local_err);
1401 if (ret != 0) {
1402 break;
1403 }
1404 }
1405
1406 if (flags & MULTIFD_FLAG_SYNC) {
1407 qemu_sem_post(&multifd_recv_state->sem_sync);
1408 qemu_sem_wait(&p->sem_sync);
1409 }
1410 }
1411
1412 if (local_err) {
1413 multifd_recv_terminate_threads(local_err);
1414 }
1415 qemu_mutex_lock(&p->mutex);
1416 p->running = false;
1417 qemu_mutex_unlock(&p->mutex);
1418
1419 rcu_unregister_thread();
1420 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1421
1422 return NULL;
1423}
1424
1425int multifd_load_setup(void)
1426{
1427 int thread_count;
1428 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
1429 uint8_t i;
1430
1431 if (!migrate_use_multifd()) {
1432 return 0;
1433 }
1434 thread_count = migrate_multifd_channels();
1435 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1436 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
1437 atomic_set(&multifd_recv_state->count, 0);
1438 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
1439
1440 for (i = 0; i < thread_count; i++) {
1441 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1442
1443 qemu_mutex_init(&p->mutex);
1444 qemu_sem_init(&p->sem_sync, 0);
1445 p->quit = false;
1446 p->id = i;
1447 p->pages = multifd_pages_init(page_count);
1448 p->packet_len = sizeof(MultiFDPacket_t)
1449 + sizeof(ram_addr_t) * page_count;
1450 p->packet = g_malloc0(p->packet_len);
1451 p->name = g_strdup_printf("multifdrecv_%d", i);
1452 }
1453 return 0;
1454}
1455
1456bool multifd_recv_all_channels_created(void)
1457{
1458 int thread_count = migrate_multifd_channels();
1459
1460 if (!migrate_use_multifd()) {
1461 return true;
1462 }
1463
1464 return thread_count == atomic_read(&multifd_recv_state->count);
1465}
1466
1467
1468
1469
1470
1471
1472
1473bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
1474{
1475 MultiFDRecvParams *p;
1476 Error *local_err = NULL;
1477 int id;
1478
1479 id = multifd_recv_initial_packet(ioc, &local_err);
1480 if (id < 0) {
1481 multifd_recv_terminate_threads(local_err);
1482 error_propagate_prepend(errp, local_err,
1483 "failed to receive packet"
1484 " via multifd channel %d: ",
1485 atomic_read(&multifd_recv_state->count));
1486 return false;
1487 }
1488 trace_multifd_recv_new_channel(id);
1489
1490 p = &multifd_recv_state->params[id];
1491 if (p->c != NULL) {
1492 error_setg(&local_err, "multifd: received id '%d' already setup'",
1493 id);
1494 multifd_recv_terminate_threads(local_err);
1495 error_propagate(errp, local_err);
1496 return false;
1497 }
1498 p->c = ioc;
1499 object_ref(OBJECT(ioc));
1500
1501 p->num_packets = 1;
1502
1503 p->running = true;
1504 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1505 QEMU_THREAD_JOINABLE);
1506 atomic_inc(&multifd_recv_state->count);
1507 return atomic_read(&multifd_recv_state->count) ==
1508 migrate_multifd_channels();
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1524 ram_addr_t offset)
1525{
1526 size_t size, len;
1527
1528 if (block == rs->last_sent_block) {
1529 offset |= RAM_SAVE_FLAG_CONTINUE;
1530 }
1531 qemu_put_be64(f, offset);
1532 size = 8;
1533
1534 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
1535 len = strlen(block->idstr);
1536 qemu_put_byte(f, len);
1537 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
1538 size += 1 + len;
1539 rs->last_sent_block = block;
1540 }
1541 return size;
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553static void mig_throttle_guest_down(void)
1554{
1555 MigrationState *s = migrate_get_current();
1556 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1557 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
1558 int pct_max = s->parameters.max_cpu_throttle;
1559
1560
1561 if (!cpu_throttle_active()) {
1562 cpu_throttle_set(pct_initial);
1563 } else {
1564
1565 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1566 pct_max));
1567 }
1568}
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
1583{
1584 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1585 return;
1586 }
1587
1588
1589
1590 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
1591 ram_counters.dirty_sync_count);
1592}
1593
1594#define ENCODING_FLAG_XBZRLE 0x1
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
1611 ram_addr_t current_addr, RAMBlock *block,
1612 ram_addr_t offset, bool last_stage)
1613{
1614 int encoded_len = 0, bytes_xbzrle;
1615 uint8_t *prev_cached_page;
1616
1617 if (!cache_is_cached(XBZRLE.cache, current_addr,
1618 ram_counters.dirty_sync_count)) {
1619 xbzrle_counters.cache_miss++;
1620 if (!last_stage) {
1621 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
1622 ram_counters.dirty_sync_count) == -1) {
1623 return -1;
1624 } else {
1625
1626
1627 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1628 }
1629 }
1630 return -1;
1631 }
1632
1633 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1634
1635
1636 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1637
1638
1639 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1640 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1641 TARGET_PAGE_SIZE);
1642
1643
1644
1645
1646
1647 if (!last_stage && encoded_len != 0) {
1648 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1649
1650
1651
1652
1653
1654 *current_data = prev_cached_page;
1655 }
1656
1657 if (encoded_len == 0) {
1658 trace_save_xbzrle_page_skipping();
1659 return 0;
1660 } else if (encoded_len == -1) {
1661 trace_save_xbzrle_page_overflow();
1662 xbzrle_counters.overflow++;
1663 return -1;
1664 }
1665
1666
1667 bytes_xbzrle = save_page_header(rs, rs->f, block,
1668 offset | RAM_SAVE_FLAG_XBZRLE);
1669 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1670 qemu_put_be16(rs->f, encoded_len);
1671 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
1672 bytes_xbzrle += encoded_len + 1 + 2;
1673 xbzrle_counters.pages++;
1674 xbzrle_counters.bytes += bytes_xbzrle;
1675 ram_counters.transferred += bytes_xbzrle;
1676
1677 return 1;
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static inline
1690unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1691 unsigned long start)
1692{
1693 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1694 unsigned long *bitmap = rb->bmap;
1695 unsigned long next;
1696
1697 if (ramblock_is_ignored(rb)) {
1698 return size;
1699 }
1700
1701
1702
1703
1704
1705 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
1706 next = start + 1;
1707 } else {
1708 next = find_next_bit(bitmap, size, start);
1709 }
1710
1711 return next;
1712}
1713
1714static inline bool migration_bitmap_clear_dirty(RAMState *rs,
1715 RAMBlock *rb,
1716 unsigned long page)
1717{
1718 bool ret;
1719
1720 qemu_mutex_lock(&rs->bitmap_mutex);
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
1731 uint8_t shift = rb->clear_bmap_shift;
1732 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
1733 hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743 assert(shift >= 6);
1744 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
1745 memory_region_clear_dirty_bitmap(rb->mr, start, size);
1746 }
1747
1748 ret = test_and_clear_bit(page, rb->bmap);
1749
1750 if (ret) {
1751 rs->migration_dirty_pages--;
1752 }
1753 qemu_mutex_unlock(&rs->bitmap_mutex);
1754
1755 return ret;
1756}
1757
1758
1759static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
1760{
1761 rs->migration_dirty_pages +=
1762 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length,
1763 &rs->num_dirty_pages_period);
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775uint64_t ram_pagesize_summary(void)
1776{
1777 RAMBlock *block;
1778 uint64_t summary = 0;
1779
1780 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1781 summary |= block->page_size;
1782 }
1783
1784 return summary;
1785}
1786
1787uint64_t ram_get_total_transferred_pages(void)
1788{
1789 return ram_counters.normal + ram_counters.duplicate +
1790 compression_counters.pages + xbzrle_counters.pages;
1791}
1792
1793static void migration_update_rates(RAMState *rs, int64_t end_time)
1794{
1795 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
1796 double compressed_size;
1797
1798
1799 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1800 / (end_time - rs->time_last_bitmap_sync);
1801
1802 if (!page_count) {
1803 return;
1804 }
1805
1806 if (migrate_use_xbzrle()) {
1807 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1808 rs->xbzrle_cache_miss_prev) / page_count;
1809 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1810 }
1811
1812 if (migrate_use_compression()) {
1813 compression_counters.busy_rate = (double)(compression_counters.busy -
1814 rs->compress_thread_busy_prev) / page_count;
1815 rs->compress_thread_busy_prev = compression_counters.busy;
1816
1817 compressed_size = compression_counters.compressed_size -
1818 rs->compressed_size_prev;
1819 if (compressed_size) {
1820 double uncompressed_size = (compression_counters.pages -
1821 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1822
1823
1824 compression_counters.compression_rate =
1825 uncompressed_size / compressed_size;
1826
1827 rs->compress_pages_prev = compression_counters.pages;
1828 rs->compressed_size_prev = compression_counters.compressed_size;
1829 }
1830 }
1831}
1832
1833static void migration_bitmap_sync(RAMState *rs)
1834{
1835 RAMBlock *block;
1836 int64_t end_time;
1837 uint64_t bytes_xfer_now;
1838
1839 ram_counters.dirty_sync_count++;
1840
1841 if (!rs->time_last_bitmap_sync) {
1842 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1843 }
1844
1845 trace_migration_bitmap_sync_start();
1846 memory_global_dirty_log_sync();
1847
1848 qemu_mutex_lock(&rs->bitmap_mutex);
1849 WITH_RCU_READ_LOCK_GUARD() {
1850 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1851 ramblock_sync_dirty_bitmap(rs, block);
1852 }
1853 ram_counters.remaining = ram_bytes_remaining();
1854 }
1855 qemu_mutex_unlock(&rs->bitmap_mutex);
1856
1857 memory_global_after_dirty_log_sync();
1858 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1859
1860 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1861
1862
1863 if (end_time > rs->time_last_bitmap_sync + 1000) {
1864 bytes_xfer_now = ram_counters.transferred;
1865
1866
1867
1868
1869 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1870
1871
1872
1873
1874
1875
1876 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
1877 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
1878 (++rs->dirty_rate_high_cnt >= 2)) {
1879 trace_migration_throttle();
1880 rs->dirty_rate_high_cnt = 0;
1881 mig_throttle_guest_down();
1882 }
1883 }
1884
1885 migration_update_rates(rs, end_time);
1886
1887 rs->target_page_count_prev = rs->target_page_count;
1888
1889
1890 rs->time_last_bitmap_sync = end_time;
1891 rs->num_dirty_pages_period = 0;
1892 rs->bytes_xfer_prev = bytes_xfer_now;
1893 }
1894 if (migrate_use_events()) {
1895 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1896 }
1897}
1898
1899static void migration_bitmap_sync_precopy(RAMState *rs)
1900{
1901 Error *local_err = NULL;
1902
1903
1904
1905
1906
1907 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1908 error_report_err(local_err);
1909 }
1910
1911 migration_bitmap_sync(rs);
1912
1913 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1914 error_report_err(local_err);
1915 }
1916}
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1930 RAMBlock *block, ram_addr_t offset)
1931{
1932 uint8_t *p = block->host + offset;
1933 int len = 0;
1934
1935 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1936 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1937 qemu_put_byte(file, 0);
1938 len += 1;
1939 }
1940 return len;
1941}
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1953{
1954 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1955
1956 if (len) {
1957 ram_counters.duplicate++;
1958 ram_counters.transferred += len;
1959 return 1;
1960 }
1961 return -1;
1962}
1963
1964static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1965{
1966 if (!migrate_release_ram() || !migration_in_postcopy()) {
1967 return;
1968 }
1969
1970 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
1971}
1972
1973
1974
1975
1976
1977
1978
1979
1980static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1981 int *pages)
1982{
1983 uint64_t bytes_xmit = 0;
1984 int ret;
1985
1986 *pages = -1;
1987 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1988 &bytes_xmit);
1989 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1990 return false;
1991 }
1992
1993 if (bytes_xmit) {
1994 ram_counters.transferred += bytes_xmit;
1995 *pages = 1;
1996 }
1997
1998 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1999 return true;
2000 }
2001
2002 if (bytes_xmit > 0) {
2003 ram_counters.normal++;
2004 } else if (bytes_xmit == 0) {
2005 ram_counters.duplicate++;
2006 }
2007
2008 return true;
2009}
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
2023 uint8_t *buf, bool async)
2024{
2025 ram_counters.transferred += save_page_header(rs, rs->f, block,
2026 offset | RAM_SAVE_FLAG_PAGE);
2027 if (async) {
2028 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
2029 migrate_release_ram() &
2030 migration_in_postcopy());
2031 } else {
2032 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
2033 }
2034 ram_counters.transferred += TARGET_PAGE_SIZE;
2035 ram_counters.normal++;
2036 return 1;
2037}
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
2053{
2054 int pages = -1;
2055 uint8_t *p;
2056 bool send_async = true;
2057 RAMBlock *block = pss->block;
2058 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2059 ram_addr_t current_addr = block->offset + offset;
2060
2061 p = block->host + offset;
2062 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
2063
2064 XBZRLE_cache_lock();
2065 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
2066 migrate_use_xbzrle()) {
2067 pages = save_xbzrle_page(rs, &p, current_addr, block,
2068 offset, last_stage);
2069 if (!last_stage) {
2070
2071
2072
2073 send_async = false;
2074 }
2075 }
2076
2077
2078 if (pages == -1) {
2079 pages = save_normal_page(rs, block, offset, p, send_async);
2080 }
2081
2082 XBZRLE_cache_unlock();
2083
2084 return pages;
2085}
2086
2087static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
2088 ram_addr_t offset)
2089{
2090 if (multifd_queue_page(rs, block, offset) < 0) {
2091 return -1;
2092 }
2093 ram_counters.normal++;
2094
2095 return 1;
2096}
2097
2098static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
2099 ram_addr_t offset, uint8_t *source_buf)
2100{
2101 RAMState *rs = ram_state;
2102 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
2103 bool zero_page = false;
2104 int ret;
2105
2106 if (save_zero_page_to_file(rs, f, block, offset)) {
2107 zero_page = true;
2108 goto exit;
2109 }
2110
2111 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
2112
2113
2114
2115
2116
2117
2118 memcpy(source_buf, p, TARGET_PAGE_SIZE);
2119 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2120 if (ret < 0) {
2121 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
2122 error_report("compressed data failed!");
2123 return false;
2124 }
2125
2126exit:
2127 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
2128 return zero_page;
2129}
2130
2131static void
2132update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2133{
2134 ram_counters.transferred += bytes_xmit;
2135
2136 if (param->zero_page) {
2137 ram_counters.duplicate++;
2138 return;
2139 }
2140
2141
2142 compression_counters.compressed_size += bytes_xmit - 8;
2143 compression_counters.pages++;
2144}
2145
2146static bool save_page_use_compression(RAMState *rs);
2147
2148static void flush_compressed_data(RAMState *rs)
2149{
2150 int idx, len, thread_count;
2151
2152 if (!save_page_use_compression(rs)) {
2153 return;
2154 }
2155 thread_count = migrate_compress_threads();
2156
2157 qemu_mutex_lock(&comp_done_lock);
2158 for (idx = 0; idx < thread_count; idx++) {
2159 while (!comp_param[idx].done) {
2160 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2161 }
2162 }
2163 qemu_mutex_unlock(&comp_done_lock);
2164
2165 for (idx = 0; idx < thread_count; idx++) {
2166 qemu_mutex_lock(&comp_param[idx].mutex);
2167 if (!comp_param[idx].quit) {
2168 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2169
2170
2171
2172
2173
2174 update_compress_thread_counts(&comp_param[idx], len);
2175 }
2176 qemu_mutex_unlock(&comp_param[idx].mutex);
2177 }
2178}
2179
2180static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2181 ram_addr_t offset)
2182{
2183 param->block = block;
2184 param->offset = offset;
2185}
2186
2187static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2188 ram_addr_t offset)
2189{
2190 int idx, thread_count, bytes_xmit = -1, pages = -1;
2191 bool wait = migrate_compress_wait_thread();
2192
2193 thread_count = migrate_compress_threads();
2194 qemu_mutex_lock(&comp_done_lock);
2195retry:
2196 for (idx = 0; idx < thread_count; idx++) {
2197 if (comp_param[idx].done) {
2198 comp_param[idx].done = false;
2199 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2200 qemu_mutex_lock(&comp_param[idx].mutex);
2201 set_compress_params(&comp_param[idx], block, offset);
2202 qemu_cond_signal(&comp_param[idx].cond);
2203 qemu_mutex_unlock(&comp_param[idx].mutex);
2204 pages = 1;
2205 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
2206 break;
2207 }
2208 }
2209
2210
2211
2212
2213
2214 if (pages < 0 && wait) {
2215 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2216 goto retry;
2217 }
2218 qemu_mutex_unlock(&comp_done_lock);
2219
2220 return pages;
2221}
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
2234{
2235 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2236 if (pss->complete_round && pss->block == rs->last_seen_block &&
2237 pss->page >= rs->last_page) {
2238
2239
2240
2241
2242 *again = false;
2243 return false;
2244 }
2245 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
2246
2247 pss->page = 0;
2248 pss->block = QLIST_NEXT_RCU(pss->block, next);
2249 if (!pss->block) {
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259 flush_compressed_data(rs);
2260
2261
2262 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2263
2264 pss->complete_round = true;
2265 rs->ram_bulk_stage = false;
2266 }
2267
2268 *again = true;
2269 return false;
2270 } else {
2271
2272 *again = true;
2273
2274 return true;
2275 }
2276}
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
2289{
2290 RAMBlock *block = NULL;
2291
2292 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2293 return NULL;
2294 }
2295
2296 qemu_mutex_lock(&rs->src_page_req_mutex);
2297 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2298 struct RAMSrcPageRequest *entry =
2299 QSIMPLEQ_FIRST(&rs->src_page_requests);
2300 block = entry->rb;
2301 *offset = entry->offset;
2302
2303 if (entry->len > TARGET_PAGE_SIZE) {
2304 entry->len -= TARGET_PAGE_SIZE;
2305 entry->offset += TARGET_PAGE_SIZE;
2306 } else {
2307 memory_region_unref(block->mr);
2308 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2309 g_free(entry);
2310 migration_consume_urgent_request();
2311 }
2312 }
2313 qemu_mutex_unlock(&rs->src_page_req_mutex);
2314
2315 return block;
2316}
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
2329{
2330 RAMBlock *block;
2331 ram_addr_t offset;
2332 bool dirty;
2333
2334 do {
2335 block = unqueue_page(rs, &offset);
2336
2337
2338
2339
2340
2341
2342 if (block) {
2343 unsigned long page;
2344
2345 page = offset >> TARGET_PAGE_BITS;
2346 dirty = test_bit(page, block->bmap);
2347 if (!dirty) {
2348 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
2349 page);
2350 } else {
2351 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
2352 }
2353 }
2354
2355 } while (block && !dirty);
2356
2357 if (block) {
2358
2359
2360
2361
2362
2363
2364 rs->ram_bulk_stage = false;
2365
2366
2367
2368
2369
2370
2371 pss->block = block;
2372 pss->page = offset >> TARGET_PAGE_BITS;
2373
2374
2375
2376
2377
2378 pss->complete_round = false;
2379 }
2380
2381 return !!block;
2382}
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392static void migration_page_queue_free(RAMState *rs)
2393{
2394 struct RAMSrcPageRequest *mspr, *next_mspr;
2395
2396
2397
2398 RCU_READ_LOCK_GUARD();
2399 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
2400 memory_region_unref(mspr->rb->mr);
2401 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2402 g_free(mspr);
2403 }
2404}
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2419{
2420 RAMBlock *ramblock;
2421 RAMState *rs = ram_state;
2422
2423 ram_counters.postcopy_requests++;
2424 RCU_READ_LOCK_GUARD();
2425
2426 if (!rbname) {
2427
2428 ramblock = rs->last_req_rb;
2429
2430 if (!ramblock) {
2431
2432
2433
2434
2435 error_report("ram_save_queue_pages no previous block");
2436 goto err;
2437 }
2438 } else {
2439 ramblock = qemu_ram_block_by_name(rbname);
2440
2441 if (!ramblock) {
2442
2443 error_report("ram_save_queue_pages no block '%s'", rbname);
2444 goto err;
2445 }
2446 rs->last_req_rb = ramblock;
2447 }
2448 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2449 if (start+len > ramblock->used_length) {
2450 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2451 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2452 __func__, start, len, ramblock->used_length);
2453 goto err;
2454 }
2455
2456 struct RAMSrcPageRequest *new_entry =
2457 g_malloc0(sizeof(struct RAMSrcPageRequest));
2458 new_entry->rb = ramblock;
2459 new_entry->offset = start;
2460 new_entry->len = len;
2461
2462 memory_region_ref(ramblock->mr);
2463 qemu_mutex_lock(&rs->src_page_req_mutex);
2464 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2465 migration_make_urgent_request();
2466 qemu_mutex_unlock(&rs->src_page_req_mutex);
2467
2468 return 0;
2469
2470err:
2471 return -1;
2472}
2473
2474static bool save_page_use_compression(RAMState *rs)
2475{
2476 if (!migrate_use_compression()) {
2477 return false;
2478 }
2479
2480
2481
2482
2483
2484
2485 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2486 return true;
2487 }
2488
2489 return false;
2490}
2491
2492
2493
2494
2495
2496
2497static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2498{
2499 if (!save_page_use_compression(rs)) {
2500 return false;
2501 }
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 if (block != rs->last_sent_block) {
2514 flush_compressed_data(rs);
2515 return false;
2516 }
2517
2518 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2519 return true;
2520 }
2521
2522 compression_counters.busy++;
2523 return false;
2524}
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
2536 bool last_stage)
2537{
2538 RAMBlock *block = pss->block;
2539 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2540 int res;
2541
2542 if (control_save_page(rs, block, offset, &res)) {
2543 return res;
2544 }
2545
2546 if (save_compress_page(rs, block, offset)) {
2547 return 1;
2548 }
2549
2550 res = save_zero_page(rs, block, offset);
2551 if (res > 0) {
2552
2553
2554
2555 if (!save_page_use_compression(rs)) {
2556 XBZRLE_cache_lock();
2557 xbzrle_cache_zero_page(rs, block->offset + offset);
2558 XBZRLE_cache_unlock();
2559 }
2560 ram_release_pages(block->idstr, offset, res);
2561 return res;
2562 }
2563
2564
2565
2566
2567
2568 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
2569 return ram_save_multifd_page(rs, block, offset);
2570 }
2571
2572 return ram_save_page(rs, pss, last_stage);
2573}
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2594 bool last_stage)
2595{
2596 int tmppages, pages = 0;
2597 size_t pagesize_bits =
2598 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2599
2600 if (ramblock_is_ignored(pss->block)) {
2601 error_report("block %s should not be migrated !", pss->block->idstr);
2602 return 0;
2603 }
2604
2605 do {
2606
2607 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2608 pss->page++;
2609 continue;
2610 }
2611
2612 tmppages = ram_save_target_page(rs, pss, last_stage);
2613 if (tmppages < 0) {
2614 return tmppages;
2615 }
2616
2617 pages += tmppages;
2618 pss->page++;
2619 } while ((pss->page & (pagesize_bits - 1)) &&
2620 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
2621
2622
2623 pss->page--;
2624 return pages;
2625}
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2643{
2644 PageSearchStatus pss;
2645 int pages = 0;
2646 bool again, found;
2647
2648
2649 if (!ram_bytes_total()) {
2650 return pages;
2651 }
2652
2653 pss.block = rs->last_seen_block;
2654 pss.page = rs->last_page;
2655 pss.complete_round = false;
2656
2657 if (!pss.block) {
2658 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2659 }
2660
2661 do {
2662 again = true;
2663 found = get_queued_page(rs, &pss);
2664
2665 if (!found) {
2666
2667 found = find_dirty_block(rs, &pss, &again);
2668 }
2669
2670 if (found) {
2671 pages = ram_save_host_page(rs, &pss, last_stage);
2672 }
2673 } while (!pages && again);
2674
2675 rs->last_seen_block = pss.block;
2676 rs->last_page = pss.page;
2677
2678 return pages;
2679}
2680
2681void acct_update_position(QEMUFile *f, size_t size, bool zero)
2682{
2683 uint64_t pages = size / TARGET_PAGE_SIZE;
2684
2685 if (zero) {
2686 ram_counters.duplicate += pages;
2687 } else {
2688 ram_counters.normal += pages;
2689 ram_counters.transferred += size;
2690 qemu_update_position(f, size);
2691 }
2692}
2693
2694static uint64_t ram_bytes_total_common(bool count_ignored)
2695{
2696 RAMBlock *block;
2697 uint64_t total = 0;
2698
2699 RCU_READ_LOCK_GUARD();
2700
2701 if (count_ignored) {
2702 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2703 total += block->used_length;
2704 }
2705 } else {
2706 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2707 total += block->used_length;
2708 }
2709 }
2710 return total;
2711}
2712
2713uint64_t ram_bytes_total(void)
2714{
2715 return ram_bytes_total_common(false);
2716}
2717
2718static void xbzrle_load_setup(void)
2719{
2720 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2721}
2722
2723static void xbzrle_load_cleanup(void)
2724{
2725 g_free(XBZRLE.decoded_buf);
2726 XBZRLE.decoded_buf = NULL;
2727}
2728
2729static void ram_state_cleanup(RAMState **rsp)
2730{
2731 if (*rsp) {
2732 migration_page_queue_free(*rsp);
2733 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2734 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2735 g_free(*rsp);
2736 *rsp = NULL;
2737 }
2738}
2739
2740static void xbzrle_cleanup(void)
2741{
2742 XBZRLE_cache_lock();
2743 if (XBZRLE.cache) {
2744 cache_fini(XBZRLE.cache);
2745 g_free(XBZRLE.encoded_buf);
2746 g_free(XBZRLE.current_buf);
2747 g_free(XBZRLE.zero_target_page);
2748 XBZRLE.cache = NULL;
2749 XBZRLE.encoded_buf = NULL;
2750 XBZRLE.current_buf = NULL;
2751 XBZRLE.zero_target_page = NULL;
2752 }
2753 XBZRLE_cache_unlock();
2754}
2755
2756static void ram_save_cleanup(void *opaque)
2757{
2758 RAMState **rsp = opaque;
2759 RAMBlock *block;
2760
2761
2762
2763
2764 memory_global_dirty_log_stop();
2765
2766 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2767 g_free(block->clear_bmap);
2768 block->clear_bmap = NULL;
2769 g_free(block->bmap);
2770 block->bmap = NULL;
2771 }
2772
2773 xbzrle_cleanup();
2774 compress_threads_save_cleanup();
2775 ram_state_cleanup(rsp);
2776}
2777
2778static void ram_state_reset(RAMState *rs)
2779{
2780 rs->last_seen_block = NULL;
2781 rs->last_sent_block = NULL;
2782 rs->last_page = 0;
2783 rs->last_version = ram_list.version;
2784 rs->ram_bulk_stage = true;
2785 rs->fpo_enabled = false;
2786}
2787
2788#define MAX_WAIT 50
2789
2790
2791
2792
2793
2794
2795void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2796 unsigned long pages)
2797{
2798 int64_t cur;
2799 int64_t linelen = 128;
2800 char linebuf[129];
2801
2802 for (cur = 0; cur < pages; cur += linelen) {
2803 int64_t curb;
2804 bool found = false;
2805
2806
2807
2808
2809 if (cur + linelen > pages) {
2810 linelen = pages - cur;
2811 }
2812 for (curb = 0; curb < linelen; curb++) {
2813 bool thisbit = test_bit(cur + curb, todump);
2814 linebuf[curb] = thisbit ? '1' : '.';
2815 found = found || (thisbit != expected);
2816 }
2817 if (found) {
2818 linebuf[curb] = '\0';
2819 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2820 }
2821 }
2822}
2823
2824
2825
2826void ram_postcopy_migrated_memory_release(MigrationState *ms)
2827{
2828 struct RAMBlock *block;
2829
2830 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2831 unsigned long *bitmap = block->bmap;
2832 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2833 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2834
2835 while (run_start < range) {
2836 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2837 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
2838 (run_end - run_start) << TARGET_PAGE_BITS);
2839 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2840 }
2841 }
2842}
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2855{
2856 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2857 unsigned long current;
2858 unsigned long *bitmap = block->bmap;
2859
2860 for (current = 0; current < end; ) {
2861 unsigned long one = find_next_bit(bitmap, end, current);
2862 unsigned long zero, discard_length;
2863
2864 if (one >= end) {
2865 break;
2866 }
2867
2868 zero = find_next_zero_bit(bitmap, end, one + 1);
2869
2870 if (zero >= end) {
2871 discard_length = end - one;
2872 } else {
2873 discard_length = zero - one;
2874 }
2875 postcopy_discard_send_range(ms, one, discard_length);
2876 current = one + discard_length;
2877 }
2878
2879 return 0;
2880}
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895static int postcopy_each_ram_send_discard(MigrationState *ms)
2896{
2897 struct RAMBlock *block;
2898 int ret;
2899
2900 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2901 postcopy_discard_send_init(ms, block->idstr);
2902
2903
2904
2905
2906
2907
2908 ret = postcopy_send_discard_bm_ram(ms, block);
2909 postcopy_discard_send_finish(ms);
2910 if (ret) {
2911 return ret;
2912 }
2913 }
2914
2915 return 0;
2916}
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2932{
2933 RAMState *rs = ram_state;
2934 unsigned long *bitmap = block->bmap;
2935 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2936 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2937 unsigned long run_start;
2938
2939 if (block->page_size == TARGET_PAGE_SIZE) {
2940
2941 return;
2942 }
2943
2944
2945 run_start = find_next_bit(bitmap, pages, 0);
2946
2947 while (run_start < pages) {
2948
2949
2950
2951
2952
2953 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2954
2955 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2956
2957
2958
2959
2960
2961 }
2962
2963 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2964 unsigned long page;
2965 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2966 host_ratio);
2967 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2968
2969
2970 for (page = fixup_start_addr;
2971 page < fixup_start_addr + host_ratio; page++) {
2972
2973
2974
2975
2976 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2977 }
2978 }
2979
2980
2981 run_start = find_next_bit(bitmap, pages, run_start);
2982 }
2983}
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
3000{
3001 postcopy_discard_send_init(ms, block->idstr);
3002
3003
3004
3005
3006 postcopy_chunk_hostpages_pass(ms, block);
3007
3008 postcopy_discard_send_finish(ms);
3009 return 0;
3010}
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027int ram_postcopy_send_discard_bitmap(MigrationState *ms)
3028{
3029 RAMState *rs = ram_state;
3030 RAMBlock *block;
3031 int ret;
3032
3033 RCU_READ_LOCK_GUARD();
3034
3035
3036 migration_bitmap_sync(rs);
3037
3038
3039 rs->last_seen_block = NULL;
3040 rs->last_sent_block = NULL;
3041 rs->last_page = 0;
3042
3043 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3044
3045 ret = postcopy_chunk_hostpages(ms, block);
3046 if (ret) {
3047 return ret;
3048 }
3049
3050#ifdef DEBUG_POSTCOPY
3051 ram_debug_dump_bitmap(block->bmap, true,
3052 block->used_length >> TARGET_PAGE_BITS);
3053#endif
3054 }
3055 trace_ram_postcopy_send_discard_bitmap();
3056
3057 ret = postcopy_each_ram_send_discard(ms);
3058
3059 return ret;
3060}
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072int ram_discard_range(const char *rbname, uint64_t start, size_t length)
3073{
3074 int ret = -1;
3075
3076 trace_ram_discard_range(rbname, start, length);
3077
3078 RCU_READ_LOCK_GUARD();
3079 RAMBlock *rb = qemu_ram_block_by_name(rbname);
3080
3081 if (!rb) {
3082 error_report("ram_discard_range: Failed to find block '%s'", rbname);
3083 goto err;
3084 }
3085
3086
3087
3088
3089
3090 if (rb->receivedmap) {
3091 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3092 length >> qemu_target_page_bits());
3093 }
3094
3095 ret = ram_block_discard_range(rb, start, length);
3096
3097err:
3098 return ret;
3099}
3100
3101
3102
3103
3104
3105static int xbzrle_init(void)
3106{
3107 Error *local_err = NULL;
3108
3109 if (!migrate_use_xbzrle()) {
3110 return 0;
3111 }
3112
3113 XBZRLE_cache_lock();
3114
3115 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3116 if (!XBZRLE.zero_target_page) {
3117 error_report("%s: Error allocating zero page", __func__);
3118 goto err_out;
3119 }
3120
3121 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3122 TARGET_PAGE_SIZE, &local_err);
3123 if (!XBZRLE.cache) {
3124 error_report_err(local_err);
3125 goto free_zero_page;
3126 }
3127
3128 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3129 if (!XBZRLE.encoded_buf) {
3130 error_report("%s: Error allocating encoded_buf", __func__);
3131 goto free_cache;
3132 }
3133
3134 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3135 if (!XBZRLE.current_buf) {
3136 error_report("%s: Error allocating current_buf", __func__);
3137 goto free_encoded_buf;
3138 }
3139
3140
3141 XBZRLE_cache_unlock();
3142 return 0;
3143
3144free_encoded_buf:
3145 g_free(XBZRLE.encoded_buf);
3146 XBZRLE.encoded_buf = NULL;
3147free_cache:
3148 cache_fini(XBZRLE.cache);
3149 XBZRLE.cache = NULL;
3150free_zero_page:
3151 g_free(XBZRLE.zero_target_page);
3152 XBZRLE.zero_target_page = NULL;
3153err_out:
3154 XBZRLE_cache_unlock();
3155 return -ENOMEM;
3156}
3157
3158static int ram_state_init(RAMState **rsp)
3159{
3160 *rsp = g_try_new0(RAMState, 1);
3161
3162 if (!*rsp) {
3163 error_report("%s: Init ramstate fail", __func__);
3164 return -1;
3165 }
3166
3167 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3168 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3169 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
3170
3171
3172
3173
3174
3175
3176 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
3177 ram_state_reset(*rsp);
3178
3179 return 0;
3180}
3181
3182static void ram_list_init_bitmaps(void)
3183{
3184 MigrationState *ms = migrate_get_current();
3185 RAMBlock *block;
3186 unsigned long pages;
3187 uint8_t shift;
3188
3189
3190 if (ram_bytes_total()) {
3191 shift = ms->clear_bitmap_shift;
3192 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
3193 error_report("clear_bitmap_shift (%u) too big, using "
3194 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
3195 shift = CLEAR_BITMAP_SHIFT_MAX;
3196 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
3197 error_report("clear_bitmap_shift (%u) too small, using "
3198 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
3199 shift = CLEAR_BITMAP_SHIFT_MIN;
3200 }
3201
3202 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3203 pages = block->max_length >> TARGET_PAGE_BITS;
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213 block->bmap = bitmap_new(pages);
3214 bitmap_set(block->bmap, 0, pages);
3215 block->clear_bmap_shift = shift;
3216 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
3217 }
3218 }
3219}
3220
3221static void ram_init_bitmaps(RAMState *rs)
3222{
3223
3224 qemu_mutex_lock_iothread();
3225 qemu_mutex_lock_ramlist();
3226
3227 WITH_RCU_READ_LOCK_GUARD() {
3228 ram_list_init_bitmaps();
3229 memory_global_dirty_log_start();
3230 migration_bitmap_sync_precopy(rs);
3231 }
3232 qemu_mutex_unlock_ramlist();
3233 qemu_mutex_unlock_iothread();
3234}
3235
3236static int ram_init_all(RAMState **rsp)
3237{
3238 if (ram_state_init(rsp)) {
3239 return -1;
3240 }
3241
3242 if (xbzrle_init()) {
3243 ram_state_cleanup(rsp);
3244 return -1;
3245 }
3246
3247 ram_init_bitmaps(*rsp);
3248
3249 return 0;
3250}
3251
3252static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3253{
3254 RAMBlock *block;
3255 uint64_t pages = 0;
3256
3257
3258
3259
3260
3261
3262
3263 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3264 pages += bitmap_count_one(block->bmap,
3265 block->used_length >> TARGET_PAGE_BITS);
3266 }
3267
3268
3269 rs->migration_dirty_pages = pages;
3270
3271 rs->last_seen_block = NULL;
3272 rs->last_sent_block = NULL;
3273 rs->last_page = 0;
3274 rs->last_version = ram_list.version;
3275
3276
3277
3278
3279 rs->ram_bulk_stage = false;
3280
3281
3282 rs->f = out;
3283
3284 trace_ram_state_resume_prepare(pages);
3285}
3286
3287
3288
3289
3290
3291
3292
3293void qemu_guest_free_page_hint(void *addr, size_t len)
3294{
3295 RAMBlock *block;
3296 ram_addr_t offset;
3297 size_t used_len, start, npages;
3298 MigrationState *s = migrate_get_current();
3299
3300
3301 if (!migration_is_setup_or_active(s->state)) {
3302 return;
3303 }
3304
3305 for (; len > 0; len -= used_len, addr += used_len) {
3306 block = qemu_ram_block_from_host(addr, false, &offset);
3307 if (unlikely(!block || offset >= block->used_length)) {
3308
3309
3310
3311
3312
3313 error_report_once("%s unexpected error", __func__);
3314 return;
3315 }
3316
3317 if (len <= block->used_length - offset) {
3318 used_len = len;
3319 } else {
3320 used_len = block->used_length - offset;
3321 }
3322
3323 start = offset >> TARGET_PAGE_BITS;
3324 npages = used_len >> TARGET_PAGE_BITS;
3325
3326 qemu_mutex_lock(&ram_state->bitmap_mutex);
3327 ram_state->migration_dirty_pages -=
3328 bitmap_count_one_with_offset(block->bmap, start, npages);
3329 bitmap_clear(block->bmap, start, npages);
3330 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3331 }
3332}
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349static int ram_save_setup(QEMUFile *f, void *opaque)
3350{
3351 RAMState **rsp = opaque;
3352 RAMBlock *block;
3353
3354 if (compress_threads_save_setup()) {
3355 return -1;
3356 }
3357
3358
3359 if (!migration_in_colo_state()) {
3360 if (ram_init_all(rsp) != 0) {
3361 compress_threads_save_cleanup();
3362 return -1;
3363 }
3364 }
3365 (*rsp)->f = f;
3366
3367 WITH_RCU_READ_LOCK_GUARD() {
3368 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
3369
3370 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3371 qemu_put_byte(f, strlen(block->idstr));
3372 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3373 qemu_put_be64(f, block->used_length);
3374 if (migrate_postcopy_ram() && block->page_size !=
3375 qemu_host_page_size) {
3376 qemu_put_be64(f, block->page_size);
3377 }
3378 if (migrate_ignore_shared()) {
3379 qemu_put_be64(f, block->mr->addr);
3380 }
3381 }
3382 }
3383
3384 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3385 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3386
3387 multifd_send_sync_main(*rsp);
3388 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3389 qemu_fflush(f);
3390
3391 return 0;
3392}
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402static int ram_save_iterate(QEMUFile *f, void *opaque)
3403{
3404 RAMState **temp = opaque;
3405 RAMState *rs = *temp;
3406 int ret;
3407 int i;
3408 int64_t t0;
3409 int done = 0;
3410
3411 if (blk_mig_bulk_active()) {
3412
3413
3414
3415 goto out;
3416 }
3417
3418 WITH_RCU_READ_LOCK_GUARD() {
3419 if (ram_list.version != rs->last_version) {
3420 ram_state_reset(rs);
3421 }
3422
3423
3424 smp_rmb();
3425
3426 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3427
3428 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3429 i = 0;
3430 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3431 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3432 int pages;
3433
3434 if (qemu_file_get_error(f)) {
3435 break;
3436 }
3437
3438 pages = ram_find_and_save_block(rs, false);
3439
3440 if (pages == 0) {
3441 done = 1;
3442 break;
3443 }
3444
3445 if (pages < 0) {
3446 qemu_file_set_error(f, pages);
3447 break;
3448 }
3449
3450 rs->target_page_count += pages;
3451
3452
3453
3454
3455
3456
3457
3458 if ((i & 63) == 0) {
3459 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3460 1000000;
3461 if (t1 > MAX_WAIT) {
3462 trace_ram_save_iterate_big_wait(t1, i);
3463 break;
3464 }
3465 }
3466 i++;
3467 }
3468 }
3469
3470
3471
3472
3473
3474 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3475
3476out:
3477 multifd_send_sync_main(rs);
3478 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3479 qemu_fflush(f);
3480 ram_counters.transferred += 8;
3481
3482 ret = qemu_file_get_error(f);
3483 if (ret < 0) {
3484 return ret;
3485 }
3486
3487 return done;
3488}
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500static int ram_save_complete(QEMUFile *f, void *opaque)
3501{
3502 RAMState **temp = opaque;
3503 RAMState *rs = *temp;
3504 int ret = 0;
3505
3506 WITH_RCU_READ_LOCK_GUARD() {
3507 if (!migration_in_postcopy()) {
3508 migration_bitmap_sync_precopy(rs);
3509 }
3510
3511 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3512
3513
3514
3515
3516 while (true) {
3517 int pages;
3518
3519 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3520
3521 if (pages == 0) {
3522 break;
3523 }
3524 if (pages < 0) {
3525 ret = pages;
3526 break;
3527 }
3528 }
3529
3530 flush_compressed_data(rs);
3531 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3532 }
3533
3534 multifd_send_sync_main(rs);
3535 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3536 qemu_fflush(f);
3537
3538 return ret;
3539}
3540
3541static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3542 uint64_t *res_precopy_only,
3543 uint64_t *res_compatible,
3544 uint64_t *res_postcopy_only)
3545{
3546 RAMState **temp = opaque;
3547 RAMState *rs = *temp;
3548 uint64_t remaining_size;
3549
3550 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3551
3552 if (!migration_in_postcopy() &&
3553 remaining_size < max_size) {
3554 qemu_mutex_lock_iothread();
3555 WITH_RCU_READ_LOCK_GUARD() {
3556 migration_bitmap_sync_precopy(rs);
3557 }
3558 qemu_mutex_unlock_iothread();
3559 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3560 }
3561
3562 if (migrate_postcopy_ram()) {
3563
3564 *res_compatible += remaining_size;
3565 } else {
3566 *res_precopy_only += remaining_size;
3567 }
3568}
3569
3570static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3571{
3572 unsigned int xh_len;
3573 int xh_flags;
3574 uint8_t *loaded_data;
3575
3576
3577 xh_flags = qemu_get_byte(f);
3578 xh_len = qemu_get_be16(f);
3579
3580 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3581 error_report("Failed to load XBZRLE page - wrong compression!");
3582 return -1;
3583 }
3584
3585 if (xh_len > TARGET_PAGE_SIZE) {
3586 error_report("Failed to load XBZRLE page - len overflow!");
3587 return -1;
3588 }
3589 loaded_data = XBZRLE.decoded_buf;
3590
3591
3592 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3593
3594
3595 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3596 TARGET_PAGE_SIZE) == -1) {
3597 error_report("Failed to load XBZRLE page - decode error!");
3598 return -1;
3599 }
3600
3601 return 0;
3602}
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3615{
3616 static RAMBlock *block = NULL;
3617 char id[256];
3618 uint8_t len;
3619
3620 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3621 if (!block) {
3622 error_report("Ack, bad migration stream!");
3623 return NULL;
3624 }
3625 return block;
3626 }
3627
3628 len = qemu_get_byte(f);
3629 qemu_get_buffer(f, (uint8_t *)id, len);
3630 id[len] = 0;
3631
3632 block = qemu_ram_block_by_name(id);
3633 if (!block) {
3634 error_report("Can't find block %s", id);
3635 return NULL;
3636 }
3637
3638 if (ramblock_is_ignored(block)) {
3639 error_report("block %s should not be migrated !", id);
3640 return NULL;
3641 }
3642
3643 return block;
3644}
3645
3646static inline void *host_from_ram_block_offset(RAMBlock *block,
3647 ram_addr_t offset)
3648{
3649 if (!offset_in_ramblock(block, offset)) {
3650 return NULL;
3651 }
3652
3653 return block->host + offset;
3654}
3655
3656static inline void *colo_cache_from_block_offset(RAMBlock *block,
3657 ram_addr_t offset)
3658{
3659 if (!offset_in_ramblock(block, offset)) {
3660 return NULL;
3661 }
3662 if (!block->colo_cache) {
3663 error_report("%s: colo_cache is NULL in block :%s",
3664 __func__, block->idstr);
3665 return NULL;
3666 }
3667
3668
3669
3670
3671
3672
3673 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3674 ram_state->migration_dirty_pages++;
3675 }
3676 return block->colo_cache + offset;
3677}
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3690{
3691 if (ch != 0 || !is_zero_range(host, size)) {
3692 memset(host, ch, size);
3693 }
3694}
3695
3696
3697static int
3698qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3699 const uint8_t *source, size_t source_len)
3700{
3701 int err;
3702
3703 err = inflateReset(stream);
3704 if (err != Z_OK) {
3705 return -1;
3706 }
3707
3708 stream->avail_in = source_len;
3709 stream->next_in = (uint8_t *)source;
3710 stream->avail_out = dest_len;
3711 stream->next_out = dest;
3712
3713 err = inflate(stream, Z_NO_FLUSH);
3714 if (err != Z_STREAM_END) {
3715 return -1;
3716 }
3717
3718 return stream->total_out;
3719}
3720
3721static void *do_data_decompress(void *opaque)
3722{
3723 DecompressParam *param = opaque;
3724 unsigned long pagesize;
3725 uint8_t *des;
3726 int len, ret;
3727
3728 qemu_mutex_lock(¶m->mutex);
3729 while (!param->quit) {
3730 if (param->des) {
3731 des = param->des;
3732 len = param->len;
3733 param->des = 0;
3734 qemu_mutex_unlock(¶m->mutex);
3735
3736 pagesize = TARGET_PAGE_SIZE;
3737
3738 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3739 param->compbuf, len);
3740 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3741 error_report("decompress data failed");
3742 qemu_file_set_error(decomp_file, ret);
3743 }
3744
3745 qemu_mutex_lock(&decomp_done_lock);
3746 param->done = true;
3747 qemu_cond_signal(&decomp_done_cond);
3748 qemu_mutex_unlock(&decomp_done_lock);
3749
3750 qemu_mutex_lock(¶m->mutex);
3751 } else {
3752 qemu_cond_wait(¶m->cond, ¶m->mutex);
3753 }
3754 }
3755 qemu_mutex_unlock(¶m->mutex);
3756
3757 return NULL;
3758}
3759
3760static int wait_for_decompress_done(void)
3761{
3762 int idx, thread_count;
3763
3764 if (!migrate_use_compression()) {
3765 return 0;
3766 }
3767
3768 thread_count = migrate_decompress_threads();
3769 qemu_mutex_lock(&decomp_done_lock);
3770 for (idx = 0; idx < thread_count; idx++) {
3771 while (!decomp_param[idx].done) {
3772 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3773 }
3774 }
3775 qemu_mutex_unlock(&decomp_done_lock);
3776 return qemu_file_get_error(decomp_file);
3777}
3778
3779static void compress_threads_load_cleanup(void)
3780{
3781 int i, thread_count;
3782
3783 if (!migrate_use_compression()) {
3784 return;
3785 }
3786 thread_count = migrate_decompress_threads();
3787 for (i = 0; i < thread_count; i++) {
3788
3789
3790
3791
3792 if (!decomp_param[i].compbuf) {
3793 break;
3794 }
3795
3796 qemu_mutex_lock(&decomp_param[i].mutex);
3797 decomp_param[i].quit = true;
3798 qemu_cond_signal(&decomp_param[i].cond);
3799 qemu_mutex_unlock(&decomp_param[i].mutex);
3800 }
3801 for (i = 0; i < thread_count; i++) {
3802 if (!decomp_param[i].compbuf) {
3803 break;
3804 }
3805
3806 qemu_thread_join(decompress_threads + i);
3807 qemu_mutex_destroy(&decomp_param[i].mutex);
3808 qemu_cond_destroy(&decomp_param[i].cond);
3809 inflateEnd(&decomp_param[i].stream);
3810 g_free(decomp_param[i].compbuf);
3811 decomp_param[i].compbuf = NULL;
3812 }
3813 g_free(decompress_threads);
3814 g_free(decomp_param);
3815 decompress_threads = NULL;
3816 decomp_param = NULL;
3817 decomp_file = NULL;
3818}
3819
3820static int compress_threads_load_setup(QEMUFile *f)
3821{
3822 int i, thread_count;
3823
3824 if (!migrate_use_compression()) {
3825 return 0;
3826 }
3827
3828 thread_count = migrate_decompress_threads();
3829 decompress_threads = g_new0(QemuThread, thread_count);
3830 decomp_param = g_new0(DecompressParam, thread_count);
3831 qemu_mutex_init(&decomp_done_lock);
3832 qemu_cond_init(&decomp_done_cond);
3833 decomp_file = f;
3834 for (i = 0; i < thread_count; i++) {
3835 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3836 goto exit;
3837 }
3838
3839 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3840 qemu_mutex_init(&decomp_param[i].mutex);
3841 qemu_cond_init(&decomp_param[i].cond);
3842 decomp_param[i].done = true;
3843 decomp_param[i].quit = false;
3844 qemu_thread_create(decompress_threads + i, "decompress",
3845 do_data_decompress, decomp_param + i,
3846 QEMU_THREAD_JOINABLE);
3847 }
3848 return 0;
3849exit:
3850 compress_threads_load_cleanup();
3851 return -1;
3852}
3853
3854static void decompress_data_with_multi_threads(QEMUFile *f,
3855 void *host, int len)
3856{
3857 int idx, thread_count;
3858
3859 thread_count = migrate_decompress_threads();
3860 qemu_mutex_lock(&decomp_done_lock);
3861 while (true) {
3862 for (idx = 0; idx < thread_count; idx++) {
3863 if (decomp_param[idx].done) {
3864 decomp_param[idx].done = false;
3865 qemu_mutex_lock(&decomp_param[idx].mutex);
3866 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3867 decomp_param[idx].des = host;
3868 decomp_param[idx].len = len;
3869 qemu_cond_signal(&decomp_param[idx].cond);
3870 qemu_mutex_unlock(&decomp_param[idx].mutex);
3871 break;
3872 }
3873 }
3874 if (idx < thread_count) {
3875 break;
3876 } else {
3877 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3878 }
3879 }
3880 qemu_mutex_unlock(&decomp_done_lock);
3881}
3882
3883
3884
3885
3886
3887
3888int colo_init_ram_cache(void)
3889{
3890 RAMBlock *block;
3891
3892 rcu_read_lock();
3893 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3894 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3895 NULL,
3896 false);
3897 if (!block->colo_cache) {
3898 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3899 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3900 block->used_length);
3901 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3902 if (block->colo_cache) {
3903 qemu_anon_ram_free(block->colo_cache, block->used_length);
3904 block->colo_cache = NULL;
3905 }
3906 }
3907 return -errno;
3908 }
3909 memcpy(block->colo_cache, block->host, block->used_length);
3910 }
3911 rcu_read_unlock();
3912
3913
3914
3915
3916
3917 if (ram_bytes_total()) {
3918 RAMBlock *block;
3919
3920 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3921 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3922
3923 block->bmap = bitmap_new(pages);
3924 bitmap_set(block->bmap, 0, pages);
3925 }
3926 }
3927 ram_state = g_new0(RAMState, 1);
3928 ram_state->migration_dirty_pages = 0;
3929 qemu_mutex_init(&ram_state->bitmap_mutex);
3930 memory_global_dirty_log_start();
3931
3932 return 0;
3933}
3934
3935
3936void colo_release_ram_cache(void)
3937{
3938 RAMBlock *block;
3939
3940 memory_global_dirty_log_stop();
3941 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3942 g_free(block->bmap);
3943 block->bmap = NULL;
3944 }
3945
3946 WITH_RCU_READ_LOCK_GUARD() {
3947 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3948 if (block->colo_cache) {
3949 qemu_anon_ram_free(block->colo_cache, block->used_length);
3950 block->colo_cache = NULL;
3951 }
3952 }
3953 }
3954 qemu_mutex_destroy(&ram_state->bitmap_mutex);
3955 g_free(ram_state);
3956 ram_state = NULL;
3957}
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967static int ram_load_setup(QEMUFile *f, void *opaque)
3968{
3969 if (compress_threads_load_setup(f)) {
3970 return -1;
3971 }
3972
3973 xbzrle_load_setup();
3974 ramblock_recv_map_init();
3975
3976 return 0;
3977}
3978
3979static int ram_load_cleanup(void *opaque)
3980{
3981 RAMBlock *rb;
3982
3983 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3984 if (ramblock_is_pmem(rb)) {
3985 pmem_persist(rb->host, rb->used_length);
3986 }
3987 }
3988
3989 xbzrle_load_cleanup();
3990 compress_threads_load_cleanup();
3991
3992 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3993 g_free(rb->receivedmap);
3994 rb->receivedmap = NULL;
3995 }
3996
3997 return 0;
3998}
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011int ram_postcopy_incoming_init(MigrationIncomingState *mis)
4012{
4013 return postcopy_ram_incoming_init(mis);
4014}
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026static int ram_load_postcopy(QEMUFile *f)
4027{
4028 int flags = 0, ret = 0;
4029 bool place_needed = false;
4030 bool matches_target_page_size = false;
4031 MigrationIncomingState *mis = migration_incoming_get_current();
4032
4033 void *postcopy_host_page = mis->postcopy_tmp_page;
4034 void *last_host = NULL;
4035 bool all_zero = false;
4036
4037 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4038 ram_addr_t addr;
4039 void *host = NULL;
4040 void *page_buffer = NULL;
4041 void *place_source = NULL;
4042 RAMBlock *block = NULL;
4043 uint8_t ch;
4044
4045 addr = qemu_get_be64(f);
4046
4047
4048
4049
4050
4051 ret = qemu_file_get_error(f);
4052 if (ret) {
4053 break;
4054 }
4055
4056 flags = addr & ~TARGET_PAGE_MASK;
4057 addr &= TARGET_PAGE_MASK;
4058
4059 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4060 place_needed = false;
4061 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
4062 block = ram_block_from_stream(f, flags);
4063
4064 host = host_from_ram_block_offset(block, addr);
4065 if (!host) {
4066 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4067 ret = -EINVAL;
4068 break;
4069 }
4070 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081 page_buffer = postcopy_host_page +
4082 ((uintptr_t)host & (block->page_size - 1));
4083
4084 if (!((uintptr_t)host & (block->page_size - 1))) {
4085 all_zero = true;
4086 } else {
4087
4088 if (host != (last_host + TARGET_PAGE_SIZE)) {
4089 error_report("Non-sequential target page %p/%p",
4090 host, last_host);
4091 ret = -EINVAL;
4092 break;
4093 }
4094 }
4095
4096
4097
4098
4099
4100
4101 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
4102 (block->page_size - 1)) == 0;
4103 place_source = postcopy_host_page;
4104 }
4105 last_host = host;
4106
4107 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4108 case RAM_SAVE_FLAG_ZERO:
4109 ch = qemu_get_byte(f);
4110 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4111 if (ch) {
4112 all_zero = false;
4113 }
4114 break;
4115
4116 case RAM_SAVE_FLAG_PAGE:
4117 all_zero = false;
4118 if (!matches_target_page_size) {
4119
4120 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4121 } else {
4122
4123
4124
4125
4126
4127
4128
4129
4130 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4131 TARGET_PAGE_SIZE);
4132 }
4133 break;
4134 case RAM_SAVE_FLAG_EOS:
4135
4136 multifd_recv_sync_main();
4137 break;
4138 default:
4139 error_report("Unknown combination of migration flags: %#x"
4140 " (postcopy mode)", flags);
4141 ret = -EINVAL;
4142 break;
4143 }
4144
4145
4146 if (!ret && qemu_file_get_error(f)) {
4147 ret = qemu_file_get_error(f);
4148 }
4149
4150 if (!ret && place_needed) {
4151
4152 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
4153
4154 if (all_zero) {
4155 ret = postcopy_place_page_zero(mis, place_dest,
4156 block);
4157 } else {
4158 ret = postcopy_place_page(mis, place_dest,
4159 place_source, block);
4160 }
4161 }
4162 }
4163
4164 return ret;
4165}
4166
4167static bool postcopy_is_advised(void)
4168{
4169 PostcopyState ps = postcopy_state_get();
4170 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4171}
4172
4173static bool postcopy_is_running(void)
4174{
4175 PostcopyState ps = postcopy_state_get();
4176 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4177}
4178
4179
4180
4181
4182
4183static void colo_flush_ram_cache(void)
4184{
4185 RAMBlock *block = NULL;
4186 void *dst_host;
4187 void *src_host;
4188 unsigned long offset = 0;
4189
4190 memory_global_dirty_log_sync();
4191 WITH_RCU_READ_LOCK_GUARD() {
4192 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4193 ramblock_sync_dirty_bitmap(ram_state, block);
4194 }
4195 }
4196
4197 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4198 WITH_RCU_READ_LOCK_GUARD() {
4199 block = QLIST_FIRST_RCU(&ram_list.blocks);
4200
4201 while (block) {
4202 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4203
4204 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4205 offset = 0;
4206 block = QLIST_NEXT_RCU(block, next);
4207 } else {
4208 migration_bitmap_clear_dirty(ram_state, block, offset);
4209 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4210 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4211 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4212 }
4213 }
4214 }
4215 trace_colo_flush_ram_cache_end();
4216}
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228static int ram_load_precopy(QEMUFile *f)
4229{
4230 int flags = 0, ret = 0, invalid_flags = 0, len = 0;
4231
4232 bool postcopy_advised = postcopy_is_advised();
4233 if (!migrate_use_compression()) {
4234 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4235 }
4236
4237 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4238 ram_addr_t addr, total_ram_bytes;
4239 void *host = NULL;
4240 uint8_t ch;
4241
4242 addr = qemu_get_be64(f);
4243 flags = addr & ~TARGET_PAGE_MASK;
4244 addr &= TARGET_PAGE_MASK;
4245
4246 if (flags & invalid_flags) {
4247 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4248 error_report("Received an unexpected compressed page");
4249 }
4250
4251 ret = -EINVAL;
4252 break;
4253 }
4254
4255 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4256 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4257 RAMBlock *block = ram_block_from_stream(f, flags);
4258
4259
4260
4261
4262 if (migration_incoming_in_colo_state()) {
4263 host = colo_cache_from_block_offset(block, addr);
4264 } else {
4265 host = host_from_ram_block_offset(block, addr);
4266 }
4267 if (!host) {
4268 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4269 ret = -EINVAL;
4270 break;
4271 }
4272
4273 if (!migration_incoming_in_colo_state()) {
4274 ramblock_recv_bitmap_set(block, host);
4275 }
4276
4277 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
4278 }
4279
4280 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4281 case RAM_SAVE_FLAG_MEM_SIZE:
4282
4283 total_ram_bytes = addr;
4284 while (!ret && total_ram_bytes) {
4285 RAMBlock *block;
4286 char id[256];
4287 ram_addr_t length;
4288
4289 len = qemu_get_byte(f);
4290 qemu_get_buffer(f, (uint8_t *)id, len);
4291 id[len] = 0;
4292 length = qemu_get_be64(f);
4293
4294 block = qemu_ram_block_by_name(id);
4295 if (block && !qemu_ram_is_migratable(block)) {
4296 error_report("block %s should not be migrated !", id);
4297 ret = -EINVAL;
4298 } else if (block) {
4299 if (length != block->used_length) {
4300 Error *local_err = NULL;
4301
4302 ret = qemu_ram_resize(block, length,
4303 &local_err);
4304 if (local_err) {
4305 error_report_err(local_err);
4306 }
4307 }
4308
4309 if (postcopy_advised &&
4310 block->page_size != qemu_host_page_size) {
4311 uint64_t remote_page_size = qemu_get_be64(f);
4312 if (remote_page_size != block->page_size) {
4313 error_report("Mismatched RAM page size %s "
4314 "(local) %zd != %" PRId64,
4315 id, block->page_size,
4316 remote_page_size);
4317 ret = -EINVAL;
4318 }
4319 }
4320 if (migrate_ignore_shared()) {
4321 hwaddr addr = qemu_get_be64(f);
4322 if (ramblock_is_ignored(block) &&
4323 block->mr->addr != addr) {
4324 error_report("Mismatched GPAs for block %s "
4325 "%" PRId64 "!= %" PRId64,
4326 id, (uint64_t)addr,
4327 (uint64_t)block->mr->addr);
4328 ret = -EINVAL;
4329 }
4330 }
4331 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4332 block->idstr);
4333 } else {
4334 error_report("Unknown ramblock \"%s\", cannot "
4335 "accept migration", id);
4336 ret = -EINVAL;
4337 }
4338
4339 total_ram_bytes -= length;
4340 }
4341 break;
4342
4343 case RAM_SAVE_FLAG_ZERO:
4344 ch = qemu_get_byte(f);
4345 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4346 break;
4347
4348 case RAM_SAVE_FLAG_PAGE:
4349 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4350 break;
4351
4352 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4353 len = qemu_get_be32(f);
4354 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4355 error_report("Invalid compressed data length: %d", len);
4356 ret = -EINVAL;
4357 break;
4358 }
4359 decompress_data_with_multi_threads(f, host, len);
4360 break;
4361
4362 case RAM_SAVE_FLAG_XBZRLE:
4363 if (load_xbzrle(f, addr, host) < 0) {
4364 error_report("Failed to decompress XBZRLE page at "
4365 RAM_ADDR_FMT, addr);
4366 ret = -EINVAL;
4367 break;
4368 }
4369 break;
4370 case RAM_SAVE_FLAG_EOS:
4371
4372 multifd_recv_sync_main();
4373 break;
4374 default:
4375 if (flags & RAM_SAVE_FLAG_HOOK) {
4376 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
4377 } else {
4378 error_report("Unknown combination of migration flags: %#x",
4379 flags);
4380 ret = -EINVAL;
4381 }
4382 }
4383 if (!ret) {
4384 ret = qemu_file_get_error(f);
4385 }
4386 }
4387
4388 return ret;
4389}
4390
4391static int ram_load(QEMUFile *f, void *opaque, int version_id)
4392{
4393 int ret = 0;
4394 static uint64_t seq_iter;
4395
4396
4397
4398
4399 bool postcopy_running = postcopy_is_running();
4400
4401 seq_iter++;
4402
4403 if (version_id != 4) {
4404 return -EINVAL;
4405 }
4406
4407
4408
4409
4410
4411
4412
4413 WITH_RCU_READ_LOCK_GUARD() {
4414 if (postcopy_running) {
4415 ret = ram_load_postcopy(f);
4416 } else {
4417 ret = ram_load_precopy(f);
4418 }
4419
4420 ret |= wait_for_decompress_done();
4421 }
4422 trace_ram_load_complete(ret, seq_iter);
4423
4424 if (!ret && migration_incoming_in_colo_state()) {
4425 colo_flush_ram_cache();
4426 }
4427 return ret;
4428}
4429
4430static bool ram_has_postcopy(void *opaque)
4431{
4432 RAMBlock *rb;
4433 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4434 if (ramblock_is_pmem(rb)) {
4435 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4436 "is not supported now!", rb->idstr, rb->host);
4437 return false;
4438 }
4439 }
4440
4441 return migrate_postcopy_ram();
4442}
4443
4444
4445static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4446{
4447 RAMBlock *block;
4448 QEMUFile *file = s->to_dst_file;
4449 int ramblock_count = 0;
4450
4451 trace_ram_dirty_bitmap_sync_start();
4452
4453 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4454 qemu_savevm_send_recv_bitmap(file, block->idstr);
4455 trace_ram_dirty_bitmap_request(block->idstr);
4456 ramblock_count++;
4457 }
4458
4459 trace_ram_dirty_bitmap_sync_wait();
4460
4461
4462 while (ramblock_count--) {
4463 qemu_sem_wait(&s->rp_state.rp_sem);
4464 }
4465
4466 trace_ram_dirty_bitmap_sync_complete();
4467
4468 return 0;
4469}
4470
4471static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4472{
4473 qemu_sem_post(&s->rp_state.rp_sem);
4474}
4475
4476
4477
4478
4479
4480
4481int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4482{
4483 int ret = -EINVAL;
4484 QEMUFile *file = s->rp_state.from_dst_file;
4485 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4486 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4487 uint64_t size, end_mark;
4488
4489 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4490
4491 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4492 error_report("%s: incorrect state %s", __func__,
4493 MigrationStatus_str(s->state));
4494 return -EINVAL;
4495 }
4496
4497
4498
4499
4500
4501 local_size = ROUND_UP(local_size, 8);
4502
4503
4504 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4505
4506 size = qemu_get_be64(file);
4507
4508
4509 if (size != local_size) {
4510 error_report("%s: ramblock '%s' bitmap size mismatch "
4511 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4512 block->idstr, size, local_size);
4513 ret = -EINVAL;
4514 goto out;
4515 }
4516
4517 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4518 end_mark = qemu_get_be64(file);
4519
4520 ret = qemu_file_get_error(file);
4521 if (ret || size != local_size) {
4522 error_report("%s: read bitmap failed for ramblock '%s': %d"
4523 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4524 __func__, block->idstr, ret, local_size, size);
4525 ret = -EIO;
4526 goto out;
4527 }
4528
4529 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4530 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4531 __func__, block->idstr, end_mark);
4532 ret = -EINVAL;
4533 goto out;
4534 }
4535
4536
4537
4538
4539
4540 bitmap_from_le(block->bmap, le_bitmap, nbits);
4541
4542
4543
4544
4545
4546 bitmap_complement(block->bmap, block->bmap, nbits);
4547
4548 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4549
4550
4551
4552
4553
4554 ram_dirty_bitmap_reload_notify(s);
4555
4556 ret = 0;
4557out:
4558 g_free(le_bitmap);
4559 return ret;
4560}
4561
4562static int ram_resume_prepare(MigrationState *s, void *opaque)
4563{
4564 RAMState *rs = *(RAMState **)opaque;
4565 int ret;
4566
4567 ret = ram_dirty_bitmap_sync_all(s, rs);
4568 if (ret) {
4569 return ret;
4570 }
4571
4572 ram_state_resume_prepare(rs, s->to_dst_file);
4573
4574 return 0;
4575}
4576
4577static SaveVMHandlers savevm_ram_handlers = {
4578 .save_setup = ram_save_setup,
4579 .save_live_iterate = ram_save_iterate,
4580 .save_live_complete_postcopy = ram_save_complete,
4581 .save_live_complete_precopy = ram_save_complete,
4582 .has_postcopy = ram_has_postcopy,
4583 .save_live_pending = ram_save_pending,
4584 .load_state = ram_load,
4585 .save_cleanup = ram_save_cleanup,
4586 .load_setup = ram_load_setup,
4587 .load_cleanup = ram_load_cleanup,
4588 .resume_prepare = ram_resume_prepare,
4589};
4590
4591void ram_mig_init(void)
4592{
4593 qemu_mutex_init(&XBZRLE.lock);
4594 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4595}
4596