1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "cpu.h"
31#include <zlib.h>
32#include "qemu/cutils.h"
33#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
35#include "qemu/main-loop.h"
36#include "qemu/pmem.h"
37#include "xbzrle.h"
38#include "ram.h"
39#include "migration.h"
40#include "socket.h"
41#include "migration/register.h"
42#include "migration/misc.h"
43#include "qemu-file.h"
44#include "postcopy-ram.h"
45#include "page_cache.h"
46#include "qemu/error-report.h"
47#include "qapi/error.h"
48#include "qapi/qapi-events-migration.h"
49#include "qapi/qmp/qerror.h"
50#include "trace.h"
51#include "exec/ram_addr.h"
52#include "exec/target_page.h"
53#include "qemu/rcu_queue.h"
54#include "migration/colo.h"
55#include "block.h"
56#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
58#include "savevm.h"
59#include "qemu/iov.h"
60
61
62
63
64
65
66
67
68
69
70#define RAM_SAVE_FLAG_FULL 0x01
71#define RAM_SAVE_FLAG_ZERO 0x02
72#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
80static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
82 return buffer_is_zero(p, size);
83}
84
85XBZRLECacheStats xbzrle_counters;
86
87
88
89static struct {
90
91 uint8_t *encoded_buf;
92
93 uint8_t *current_buf;
94
95 PageCache *cache;
96 QemuMutex lock;
97
98 uint8_t *zero_target_page;
99
100 uint8_t *decoded_buf;
101} XBZRLE;
102
103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128int xbzrle_cache_resize(int64_t new_size, Error **errp)
129{
130 PageCache *new_cache;
131 int64_t ret = 0;
132
133
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
140 if (new_size == migrate_xbzrle_cache_size()) {
141
142 return 0;
143 }
144
145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
149 if (!new_cache) {
150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
162
163#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
164 INTERNAL_RAMBLOCK_FOREACH(block) \
165 if (!qemu_ram_is_migratable(block)) {} else
166
167#undef RAMBLOCK_FOREACH
168
169static void ramblock_recv_map_init(void)
170{
171 RAMBlock *rb;
172
173 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
174 assert(!rb->receivedmap);
175 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
176 }
177}
178
179int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
180{
181 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
182 rb->receivedmap);
183}
184
185bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
186{
187 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
188}
189
190void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
191{
192 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
193}
194
195void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
196 size_t nr)
197{
198 bitmap_set_atomic(rb->receivedmap,
199 ramblock_recv_bitmap_offset(host_addr, rb),
200 nr);
201}
202
203#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
204
205
206
207
208
209
210int64_t ramblock_recv_bitmap_send(QEMUFile *file,
211 const char *block_name)
212{
213 RAMBlock *block = qemu_ram_block_by_name(block_name);
214 unsigned long *le_bitmap, nbits;
215 uint64_t size;
216
217 if (!block) {
218 error_report("%s: invalid block name: %s", __func__, block_name);
219 return -1;
220 }
221
222 nbits = block->used_length >> TARGET_PAGE_BITS;
223
224
225
226
227
228
229 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
230
231
232
233
234
235
236 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
237
238
239 size = DIV_ROUND_UP(nbits, 8);
240
241
242
243
244
245
246
247 size = ROUND_UP(size, 8);
248
249 qemu_put_be64(file, size);
250 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
251
252
253
254
255 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
256 qemu_fflush(file);
257
258 g_free(le_bitmap);
259
260 if (qemu_file_get_error(file)) {
261 return qemu_file_get_error(file);
262 }
263
264 return size + sizeof(size);
265}
266
267
268
269
270
271struct RAMSrcPageRequest {
272 RAMBlock *rb;
273 hwaddr offset;
274 hwaddr len;
275
276 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
277};
278
279
280struct RAMState {
281
282 QEMUFile *f;
283
284 RAMBlock *last_seen_block;
285
286 RAMBlock *last_sent_block;
287
288 ram_addr_t last_page;
289
290 uint32_t last_version;
291
292 bool ram_bulk_stage;
293
294 int dirty_rate_high_cnt;
295
296
297 int64_t time_last_bitmap_sync;
298
299 uint64_t bytes_xfer_prev;
300
301 uint64_t num_dirty_pages_period;
302
303 uint64_t xbzrle_cache_miss_prev;
304
305
306
307 uint64_t compress_thread_busy_prev;
308
309 uint64_t compressed_size_prev;
310
311 uint64_t compress_pages_prev;
312
313
314 uint64_t target_page_count_prev;
315
316 uint64_t target_page_count;
317
318 uint64_t migration_dirty_pages;
319
320 QemuMutex bitmap_mutex;
321
322 RAMBlock *last_req_rb;
323
324 QemuMutex src_page_req_mutex;
325 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
326};
327typedef struct RAMState RAMState;
328
329static RAMState *ram_state;
330
331uint64_t ram_bytes_remaining(void)
332{
333 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
334 0;
335}
336
337MigrationStats ram_counters;
338
339
340struct PageSearchStatus {
341
342 RAMBlock *block;
343
344 unsigned long page;
345
346 bool complete_round;
347};
348typedef struct PageSearchStatus PageSearchStatus;
349
350CompressionStats compression_counters;
351
352struct CompressParam {
353 bool done;
354 bool quit;
355 bool zero_page;
356 QEMUFile *file;
357 QemuMutex mutex;
358 QemuCond cond;
359 RAMBlock *block;
360 ram_addr_t offset;
361
362
363 z_stream stream;
364 uint8_t *originbuf;
365};
366typedef struct CompressParam CompressParam;
367
368struct DecompressParam {
369 bool done;
370 bool quit;
371 QemuMutex mutex;
372 QemuCond cond;
373 void *des;
374 uint8_t *compbuf;
375 int len;
376 z_stream stream;
377};
378typedef struct DecompressParam DecompressParam;
379
380static CompressParam *comp_param;
381static QemuThread *compress_threads;
382
383
384
385
386static QemuMutex comp_done_lock;
387static QemuCond comp_done_cond;
388
389static const QEMUFileOps empty_ops = { };
390
391static QEMUFile *decomp_file;
392static DecompressParam *decomp_param;
393static QemuThread *decompress_threads;
394static QemuMutex decomp_done_lock;
395static QemuCond decomp_done_cond;
396
397static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
398 ram_addr_t offset, uint8_t *source_buf);
399
400static void *do_data_compress(void *opaque)
401{
402 CompressParam *param = opaque;
403 RAMBlock *block;
404 ram_addr_t offset;
405 bool zero_page;
406
407 qemu_mutex_lock(¶m->mutex);
408 while (!param->quit) {
409 if (param->block) {
410 block = param->block;
411 offset = param->offset;
412 param->block = NULL;
413 qemu_mutex_unlock(¶m->mutex);
414
415 zero_page = do_compress_ram_page(param->file, ¶m->stream,
416 block, offset, param->originbuf);
417
418 qemu_mutex_lock(&comp_done_lock);
419 param->done = true;
420 param->zero_page = zero_page;
421 qemu_cond_signal(&comp_done_cond);
422 qemu_mutex_unlock(&comp_done_lock);
423
424 qemu_mutex_lock(¶m->mutex);
425 } else {
426 qemu_cond_wait(¶m->cond, ¶m->mutex);
427 }
428 }
429 qemu_mutex_unlock(¶m->mutex);
430
431 return NULL;
432}
433
434static void compress_threads_save_cleanup(void)
435{
436 int i, thread_count;
437
438 if (!migrate_use_compression() || !comp_param) {
439 return;
440 }
441
442 thread_count = migrate_compress_threads();
443 for (i = 0; i < thread_count; i++) {
444
445
446
447
448 if (!comp_param[i].file) {
449 break;
450 }
451
452 qemu_mutex_lock(&comp_param[i].mutex);
453 comp_param[i].quit = true;
454 qemu_cond_signal(&comp_param[i].cond);
455 qemu_mutex_unlock(&comp_param[i].mutex);
456
457 qemu_thread_join(compress_threads + i);
458 qemu_mutex_destroy(&comp_param[i].mutex);
459 qemu_cond_destroy(&comp_param[i].cond);
460 deflateEnd(&comp_param[i].stream);
461 g_free(comp_param[i].originbuf);
462 qemu_fclose(comp_param[i].file);
463 comp_param[i].file = NULL;
464 }
465 qemu_mutex_destroy(&comp_done_lock);
466 qemu_cond_destroy(&comp_done_cond);
467 g_free(compress_threads);
468 g_free(comp_param);
469 compress_threads = NULL;
470 comp_param = NULL;
471}
472
473static int compress_threads_save_setup(void)
474{
475 int i, thread_count;
476
477 if (!migrate_use_compression()) {
478 return 0;
479 }
480 thread_count = migrate_compress_threads();
481 compress_threads = g_new0(QemuThread, thread_count);
482 comp_param = g_new0(CompressParam, thread_count);
483 qemu_cond_init(&comp_done_cond);
484 qemu_mutex_init(&comp_done_lock);
485 for (i = 0; i < thread_count; i++) {
486 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
487 if (!comp_param[i].originbuf) {
488 goto exit;
489 }
490
491 if (deflateInit(&comp_param[i].stream,
492 migrate_compress_level()) != Z_OK) {
493 g_free(comp_param[i].originbuf);
494 goto exit;
495 }
496
497
498
499
500 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
501 comp_param[i].done = true;
502 comp_param[i].quit = false;
503 qemu_mutex_init(&comp_param[i].mutex);
504 qemu_cond_init(&comp_param[i].cond);
505 qemu_thread_create(compress_threads + i, "compress",
506 do_data_compress, comp_param + i,
507 QEMU_THREAD_JOINABLE);
508 }
509 return 0;
510
511exit:
512 compress_threads_save_cleanup();
513 return -1;
514}
515
516
517
518#define MULTIFD_MAGIC 0x11223344U
519#define MULTIFD_VERSION 1
520
521#define MULTIFD_FLAG_SYNC (1 << 0)
522
523typedef struct {
524 uint32_t magic;
525 uint32_t version;
526 unsigned char uuid[16];
527 uint8_t id;
528} __attribute__((packed)) MultiFDInit_t;
529
530typedef struct {
531 uint32_t magic;
532 uint32_t version;
533 uint32_t flags;
534 uint32_t size;
535 uint32_t used;
536 uint64_t packet_num;
537 char ramblock[256];
538 uint64_t offset[];
539} __attribute__((packed)) MultiFDPacket_t;
540
541typedef struct {
542
543 uint32_t used;
544
545 uint32_t allocated;
546
547 uint64_t packet_num;
548
549 ram_addr_t *offset;
550
551 struct iovec *iov;
552 RAMBlock *block;
553} MultiFDPages_t;
554
555typedef struct {
556
557
558 uint8_t id;
559
560 char *name;
561
562 QemuThread thread;
563
564 QIOChannel *c;
565
566 QemuSemaphore sem;
567
568 QemuMutex mutex;
569
570 bool running;
571
572 bool quit;
573
574 int pending_job;
575
576 MultiFDPages_t *pages;
577
578 uint32_t packet_len;
579
580 MultiFDPacket_t *packet;
581
582 uint32_t flags;
583
584 uint64_t packet_num;
585
586
587 uint64_t num_packets;
588
589 uint64_t num_pages;
590
591 QemuSemaphore sem_sync;
592} MultiFDSendParams;
593
594typedef struct {
595
596
597 uint8_t id;
598
599 char *name;
600
601 QemuThread thread;
602
603 QIOChannel *c;
604
605 QemuMutex mutex;
606
607 bool running;
608
609 MultiFDPages_t *pages;
610
611 uint32_t packet_len;
612
613 MultiFDPacket_t *packet;
614
615 uint32_t flags;
616
617 uint64_t packet_num;
618
619
620 uint64_t num_packets;
621
622 uint64_t num_pages;
623
624 QemuSemaphore sem_sync;
625} MultiFDRecvParams;
626
627static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
628{
629 MultiFDInit_t msg;
630 int ret;
631
632 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
633 msg.version = cpu_to_be32(MULTIFD_VERSION);
634 msg.id = p->id;
635 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
636
637 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
638 if (ret != 0) {
639 return -1;
640 }
641 return 0;
642}
643
644static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
645{
646 MultiFDInit_t msg;
647 int ret;
648
649 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
650 if (ret != 0) {
651 return -1;
652 }
653
654 msg.magic = be32_to_cpu(msg.magic);
655 msg.version = be32_to_cpu(msg.version);
656
657 if (msg.magic != MULTIFD_MAGIC) {
658 error_setg(errp, "multifd: received packet magic %x "
659 "expected %x", msg.magic, MULTIFD_MAGIC);
660 return -1;
661 }
662
663 if (msg.version != MULTIFD_VERSION) {
664 error_setg(errp, "multifd: received packet version %d "
665 "expected %d", msg.version, MULTIFD_VERSION);
666 return -1;
667 }
668
669 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
670 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
671 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
672
673 error_setg(errp, "multifd: received uuid '%s' and expected "
674 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
675 g_free(uuid);
676 g_free(msg_uuid);
677 return -1;
678 }
679
680 if (msg.id > migrate_multifd_channels()) {
681 error_setg(errp, "multifd: received channel version %d "
682 "expected %d", msg.version, MULTIFD_VERSION);
683 return -1;
684 }
685
686 return msg.id;
687}
688
689static MultiFDPages_t *multifd_pages_init(size_t size)
690{
691 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
692
693 pages->allocated = size;
694 pages->iov = g_new0(struct iovec, size);
695 pages->offset = g_new0(ram_addr_t, size);
696
697 return pages;
698}
699
700static void multifd_pages_clear(MultiFDPages_t *pages)
701{
702 pages->used = 0;
703 pages->allocated = 0;
704 pages->packet_num = 0;
705 pages->block = NULL;
706 g_free(pages->iov);
707 pages->iov = NULL;
708 g_free(pages->offset);
709 pages->offset = NULL;
710 g_free(pages);
711}
712
713static void multifd_send_fill_packet(MultiFDSendParams *p)
714{
715 MultiFDPacket_t *packet = p->packet;
716 int i;
717
718 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
719 packet->version = cpu_to_be32(MULTIFD_VERSION);
720 packet->flags = cpu_to_be32(p->flags);
721 packet->size = cpu_to_be32(migrate_multifd_page_count());
722 packet->used = cpu_to_be32(p->pages->used);
723 packet->packet_num = cpu_to_be64(p->packet_num);
724
725 if (p->pages->block) {
726 strncpy(packet->ramblock, p->pages->block->idstr, 256);
727 }
728
729 for (i = 0; i < p->pages->used; i++) {
730 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
731 }
732}
733
734static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
735{
736 MultiFDPacket_t *packet = p->packet;
737 RAMBlock *block;
738 int i;
739
740 packet->magic = be32_to_cpu(packet->magic);
741 if (packet->magic != MULTIFD_MAGIC) {
742 error_setg(errp, "multifd: received packet "
743 "magic %x and expected magic %x",
744 packet->magic, MULTIFD_MAGIC);
745 return -1;
746 }
747
748 packet->version = be32_to_cpu(packet->version);
749 if (packet->version != MULTIFD_VERSION) {
750 error_setg(errp, "multifd: received packet "
751 "version %d and expected version %d",
752 packet->version, MULTIFD_VERSION);
753 return -1;
754 }
755
756 p->flags = be32_to_cpu(packet->flags);
757
758 packet->size = be32_to_cpu(packet->size);
759 if (packet->size > migrate_multifd_page_count()) {
760 error_setg(errp, "multifd: received packet "
761 "with size %d and expected maximum size %d",
762 packet->size, migrate_multifd_page_count()) ;
763 return -1;
764 }
765
766 p->pages->used = be32_to_cpu(packet->used);
767 if (p->pages->used > packet->size) {
768 error_setg(errp, "multifd: received packet "
769 "with size %d and expected maximum size %d",
770 p->pages->used, packet->size) ;
771 return -1;
772 }
773
774 p->packet_num = be64_to_cpu(packet->packet_num);
775
776 if (p->pages->used) {
777
778 packet->ramblock[255] = 0;
779 block = qemu_ram_block_by_name(packet->ramblock);
780 if (!block) {
781 error_setg(errp, "multifd: unknown ram block %s",
782 packet->ramblock);
783 return -1;
784 }
785 }
786
787 for (i = 0; i < p->pages->used; i++) {
788 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
789
790 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
791 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
792 " (max " RAM_ADDR_FMT ")",
793 offset, block->max_length);
794 return -1;
795 }
796 p->pages->iov[i].iov_base = block->host + offset;
797 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
798 }
799
800 return 0;
801}
802
803struct {
804 MultiFDSendParams *params;
805
806 int count;
807
808 MultiFDPages_t *pages;
809
810 QemuSemaphore sem_sync;
811
812 uint64_t packet_num;
813
814 QemuSemaphore channels_ready;
815} *multifd_send_state;
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835static void multifd_send_pages(void)
836{
837 int i;
838 static int next_channel;
839 MultiFDSendParams *p = NULL;
840 MultiFDPages_t *pages = multifd_send_state->pages;
841 uint64_t transferred;
842
843 qemu_sem_wait(&multifd_send_state->channels_ready);
844 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
845 p = &multifd_send_state->params[i];
846
847 qemu_mutex_lock(&p->mutex);
848 if (!p->pending_job) {
849 p->pending_job++;
850 next_channel = (i + 1) % migrate_multifd_channels();
851 break;
852 }
853 qemu_mutex_unlock(&p->mutex);
854 }
855 p->pages->used = 0;
856
857 p->packet_num = multifd_send_state->packet_num++;
858 p->pages->block = NULL;
859 multifd_send_state->pages = p->pages;
860 p->pages = pages;
861 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
862 ram_counters.multifd_bytes += transferred;
863 ram_counters.transferred += transferred;;
864 qemu_mutex_unlock(&p->mutex);
865 qemu_sem_post(&p->sem);
866}
867
868static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
869{
870 MultiFDPages_t *pages = multifd_send_state->pages;
871
872 if (!pages->block) {
873 pages->block = block;
874 }
875
876 if (pages->block == block) {
877 pages->offset[pages->used] = offset;
878 pages->iov[pages->used].iov_base = block->host + offset;
879 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
880 pages->used++;
881
882 if (pages->used < pages->allocated) {
883 return;
884 }
885 }
886
887 multifd_send_pages();
888
889 if (pages->block != block) {
890 multifd_queue_page(block, offset);
891 }
892}
893
894static void multifd_send_terminate_threads(Error *err)
895{
896 int i;
897
898 if (err) {
899 MigrationState *s = migrate_get_current();
900 migrate_set_error(s, err);
901 if (s->state == MIGRATION_STATUS_SETUP ||
902 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
903 s->state == MIGRATION_STATUS_DEVICE ||
904 s->state == MIGRATION_STATUS_ACTIVE) {
905 migrate_set_state(&s->state, s->state,
906 MIGRATION_STATUS_FAILED);
907 }
908 }
909
910 for (i = 0; i < migrate_multifd_channels(); i++) {
911 MultiFDSendParams *p = &multifd_send_state->params[i];
912
913 qemu_mutex_lock(&p->mutex);
914 p->quit = true;
915 qemu_sem_post(&p->sem);
916 qemu_mutex_unlock(&p->mutex);
917 }
918}
919
920int multifd_save_cleanup(Error **errp)
921{
922 int i;
923 int ret = 0;
924
925 if (!migrate_use_multifd()) {
926 return 0;
927 }
928 multifd_send_terminate_threads(NULL);
929 for (i = 0; i < migrate_multifd_channels(); i++) {
930 MultiFDSendParams *p = &multifd_send_state->params[i];
931
932 if (p->running) {
933 qemu_thread_join(&p->thread);
934 }
935 socket_send_channel_destroy(p->c);
936 p->c = NULL;
937 qemu_mutex_destroy(&p->mutex);
938 qemu_sem_destroy(&p->sem);
939 qemu_sem_destroy(&p->sem_sync);
940 g_free(p->name);
941 p->name = NULL;
942 multifd_pages_clear(p->pages);
943 p->pages = NULL;
944 p->packet_len = 0;
945 g_free(p->packet);
946 p->packet = NULL;
947 }
948 qemu_sem_destroy(&multifd_send_state->channels_ready);
949 qemu_sem_destroy(&multifd_send_state->sem_sync);
950 g_free(multifd_send_state->params);
951 multifd_send_state->params = NULL;
952 multifd_pages_clear(multifd_send_state->pages);
953 multifd_send_state->pages = NULL;
954 g_free(multifd_send_state);
955 multifd_send_state = NULL;
956 return ret;
957}
958
959static void multifd_send_sync_main(void)
960{
961 int i;
962
963 if (!migrate_use_multifd()) {
964 return;
965 }
966 if (multifd_send_state->pages->used) {
967 multifd_send_pages();
968 }
969 for (i = 0; i < migrate_multifd_channels(); i++) {
970 MultiFDSendParams *p = &multifd_send_state->params[i];
971
972 trace_multifd_send_sync_main_signal(p->id);
973
974 qemu_mutex_lock(&p->mutex);
975
976 p->packet_num = multifd_send_state->packet_num++;
977 p->flags |= MULTIFD_FLAG_SYNC;
978 p->pending_job++;
979 qemu_mutex_unlock(&p->mutex);
980 qemu_sem_post(&p->sem);
981 }
982 for (i = 0; i < migrate_multifd_channels(); i++) {
983 MultiFDSendParams *p = &multifd_send_state->params[i];
984
985 trace_multifd_send_sync_main_wait(p->id);
986 qemu_sem_wait(&multifd_send_state->sem_sync);
987 }
988 trace_multifd_send_sync_main(multifd_send_state->packet_num);
989}
990
991static void *multifd_send_thread(void *opaque)
992{
993 MultiFDSendParams *p = opaque;
994 Error *local_err = NULL;
995 int ret;
996
997 trace_multifd_send_thread_start(p->id);
998 rcu_register_thread();
999
1000 if (multifd_send_initial_packet(p, &local_err) < 0) {
1001 goto out;
1002 }
1003
1004 p->num_packets = 1;
1005
1006 while (true) {
1007 qemu_sem_wait(&p->sem);
1008 qemu_mutex_lock(&p->mutex);
1009
1010 if (p->pending_job) {
1011 uint32_t used = p->pages->used;
1012 uint64_t packet_num = p->packet_num;
1013 uint32_t flags = p->flags;
1014
1015 multifd_send_fill_packet(p);
1016 p->flags = 0;
1017 p->num_packets++;
1018 p->num_pages += used;
1019 p->pages->used = 0;
1020 qemu_mutex_unlock(&p->mutex);
1021
1022 trace_multifd_send(p->id, packet_num, used, flags);
1023
1024 ret = qio_channel_write_all(p->c, (void *)p->packet,
1025 p->packet_len, &local_err);
1026 if (ret != 0) {
1027 break;
1028 }
1029
1030 ret = qio_channel_writev_all(p->c, p->pages->iov, used, &local_err);
1031 if (ret != 0) {
1032 break;
1033 }
1034
1035 qemu_mutex_lock(&p->mutex);
1036 p->pending_job--;
1037 qemu_mutex_unlock(&p->mutex);
1038
1039 if (flags & MULTIFD_FLAG_SYNC) {
1040 qemu_sem_post(&multifd_send_state->sem_sync);
1041 }
1042 qemu_sem_post(&multifd_send_state->channels_ready);
1043 } else if (p->quit) {
1044 qemu_mutex_unlock(&p->mutex);
1045 break;
1046 } else {
1047 qemu_mutex_unlock(&p->mutex);
1048
1049 }
1050 }
1051
1052out:
1053 if (local_err) {
1054 multifd_send_terminate_threads(local_err);
1055 }
1056
1057 qemu_mutex_lock(&p->mutex);
1058 p->running = false;
1059 qemu_mutex_unlock(&p->mutex);
1060
1061 rcu_unregister_thread();
1062 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1063
1064 return NULL;
1065}
1066
1067static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1068{
1069 MultiFDSendParams *p = opaque;
1070 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1071 Error *local_err = NULL;
1072
1073 if (qio_task_propagate_error(task, &local_err)) {
1074 if (multifd_save_cleanup(&local_err) != 0) {
1075 migrate_set_error(migrate_get_current(), local_err);
1076 }
1077 } else {
1078 p->c = QIO_CHANNEL(sioc);
1079 qio_channel_set_delay(p->c, false);
1080 p->running = true;
1081 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1082 QEMU_THREAD_JOINABLE);
1083
1084 atomic_inc(&multifd_send_state->count);
1085 }
1086}
1087
1088int multifd_save_setup(void)
1089{
1090 int thread_count;
1091 uint32_t page_count = migrate_multifd_page_count();
1092 uint8_t i;
1093
1094 if (!migrate_use_multifd()) {
1095 return 0;
1096 }
1097 thread_count = migrate_multifd_channels();
1098 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1099 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
1100 atomic_set(&multifd_send_state->count, 0);
1101 multifd_send_state->pages = multifd_pages_init(page_count);
1102 qemu_sem_init(&multifd_send_state->sem_sync, 0);
1103 qemu_sem_init(&multifd_send_state->channels_ready, 0);
1104
1105 for (i = 0; i < thread_count; i++) {
1106 MultiFDSendParams *p = &multifd_send_state->params[i];
1107
1108 qemu_mutex_init(&p->mutex);
1109 qemu_sem_init(&p->sem, 0);
1110 qemu_sem_init(&p->sem_sync, 0);
1111 p->quit = false;
1112 p->pending_job = 0;
1113 p->id = i;
1114 p->pages = multifd_pages_init(page_count);
1115 p->packet_len = sizeof(MultiFDPacket_t)
1116 + sizeof(ram_addr_t) * page_count;
1117 p->packet = g_malloc0(p->packet_len);
1118 p->name = g_strdup_printf("multifdsend_%d", i);
1119 socket_send_channel_create(multifd_new_send_channel_async, p);
1120 }
1121 return 0;
1122}
1123
1124struct {
1125 MultiFDRecvParams *params;
1126
1127 int count;
1128
1129 QemuSemaphore sem_sync;
1130
1131 uint64_t packet_num;
1132} *multifd_recv_state;
1133
1134static void multifd_recv_terminate_threads(Error *err)
1135{
1136 int i;
1137
1138 if (err) {
1139 MigrationState *s = migrate_get_current();
1140 migrate_set_error(s, err);
1141 if (s->state == MIGRATION_STATUS_SETUP ||
1142 s->state == MIGRATION_STATUS_ACTIVE) {
1143 migrate_set_state(&s->state, s->state,
1144 MIGRATION_STATUS_FAILED);
1145 }
1146 }
1147
1148 for (i = 0; i < migrate_multifd_channels(); i++) {
1149 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1150
1151 qemu_mutex_lock(&p->mutex);
1152
1153
1154
1155
1156 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1157 qemu_mutex_unlock(&p->mutex);
1158 }
1159}
1160
1161int multifd_load_cleanup(Error **errp)
1162{
1163 int i;
1164 int ret = 0;
1165
1166 if (!migrate_use_multifd()) {
1167 return 0;
1168 }
1169 multifd_recv_terminate_threads(NULL);
1170 for (i = 0; i < migrate_multifd_channels(); i++) {
1171 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1172
1173 if (p->running) {
1174 qemu_thread_join(&p->thread);
1175 }
1176 object_unref(OBJECT(p->c));
1177 p->c = NULL;
1178 qemu_mutex_destroy(&p->mutex);
1179 qemu_sem_destroy(&p->sem_sync);
1180 g_free(p->name);
1181 p->name = NULL;
1182 multifd_pages_clear(p->pages);
1183 p->pages = NULL;
1184 p->packet_len = 0;
1185 g_free(p->packet);
1186 p->packet = NULL;
1187 }
1188 qemu_sem_destroy(&multifd_recv_state->sem_sync);
1189 g_free(multifd_recv_state->params);
1190 multifd_recv_state->params = NULL;
1191 g_free(multifd_recv_state);
1192 multifd_recv_state = NULL;
1193
1194 return ret;
1195}
1196
1197static void multifd_recv_sync_main(void)
1198{
1199 int i;
1200
1201 if (!migrate_use_multifd()) {
1202 return;
1203 }
1204 for (i = 0; i < migrate_multifd_channels(); i++) {
1205 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1206
1207 trace_multifd_recv_sync_main_wait(p->id);
1208 qemu_sem_wait(&multifd_recv_state->sem_sync);
1209 qemu_mutex_lock(&p->mutex);
1210 if (multifd_recv_state->packet_num < p->packet_num) {
1211 multifd_recv_state->packet_num = p->packet_num;
1212 }
1213 qemu_mutex_unlock(&p->mutex);
1214 }
1215 for (i = 0; i < migrate_multifd_channels(); i++) {
1216 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1217
1218 trace_multifd_recv_sync_main_signal(p->id);
1219 qemu_sem_post(&p->sem_sync);
1220 }
1221 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1222}
1223
1224static void *multifd_recv_thread(void *opaque)
1225{
1226 MultiFDRecvParams *p = opaque;
1227 Error *local_err = NULL;
1228 int ret;
1229
1230 trace_multifd_recv_thread_start(p->id);
1231 rcu_register_thread();
1232
1233 while (true) {
1234 uint32_t used;
1235 uint32_t flags;
1236
1237 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1238 p->packet_len, &local_err);
1239 if (ret == 0) {
1240 break;
1241 }
1242 if (ret == -1) {
1243 break;
1244 }
1245
1246 qemu_mutex_lock(&p->mutex);
1247 ret = multifd_recv_unfill_packet(p, &local_err);
1248 if (ret) {
1249 qemu_mutex_unlock(&p->mutex);
1250 break;
1251 }
1252
1253 used = p->pages->used;
1254 flags = p->flags;
1255 trace_multifd_recv(p->id, p->packet_num, used, flags);
1256 p->num_packets++;
1257 p->num_pages += used;
1258 qemu_mutex_unlock(&p->mutex);
1259
1260 ret = qio_channel_readv_all(p->c, p->pages->iov, used, &local_err);
1261 if (ret != 0) {
1262 break;
1263 }
1264
1265 if (flags & MULTIFD_FLAG_SYNC) {
1266 qemu_sem_post(&multifd_recv_state->sem_sync);
1267 qemu_sem_wait(&p->sem_sync);
1268 }
1269 }
1270
1271 if (local_err) {
1272 multifd_recv_terminate_threads(local_err);
1273 }
1274 qemu_mutex_lock(&p->mutex);
1275 p->running = false;
1276 qemu_mutex_unlock(&p->mutex);
1277
1278 rcu_unregister_thread();
1279 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1280
1281 return NULL;
1282}
1283
1284int multifd_load_setup(void)
1285{
1286 int thread_count;
1287 uint32_t page_count = migrate_multifd_page_count();
1288 uint8_t i;
1289
1290 if (!migrate_use_multifd()) {
1291 return 0;
1292 }
1293 thread_count = migrate_multifd_channels();
1294 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1295 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
1296 atomic_set(&multifd_recv_state->count, 0);
1297 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
1298
1299 for (i = 0; i < thread_count; i++) {
1300 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1301
1302 qemu_mutex_init(&p->mutex);
1303 qemu_sem_init(&p->sem_sync, 0);
1304 p->id = i;
1305 p->pages = multifd_pages_init(page_count);
1306 p->packet_len = sizeof(MultiFDPacket_t)
1307 + sizeof(ram_addr_t) * page_count;
1308 p->packet = g_malloc0(p->packet_len);
1309 p->name = g_strdup_printf("multifdrecv_%d", i);
1310 }
1311 return 0;
1312}
1313
1314bool multifd_recv_all_channels_created(void)
1315{
1316 int thread_count = migrate_multifd_channels();
1317
1318 if (!migrate_use_multifd()) {
1319 return true;
1320 }
1321
1322 return thread_count == atomic_read(&multifd_recv_state->count);
1323}
1324
1325
1326bool multifd_recv_new_channel(QIOChannel *ioc)
1327{
1328 MultiFDRecvParams *p;
1329 Error *local_err = NULL;
1330 int id;
1331
1332 id = multifd_recv_initial_packet(ioc, &local_err);
1333 if (id < 0) {
1334 multifd_recv_terminate_threads(local_err);
1335 return false;
1336 }
1337
1338 p = &multifd_recv_state->params[id];
1339 if (p->c != NULL) {
1340 error_setg(&local_err, "multifd: received id '%d' already setup'",
1341 id);
1342 multifd_recv_terminate_threads(local_err);
1343 return false;
1344 }
1345 p->c = ioc;
1346 object_ref(OBJECT(ioc));
1347
1348 p->num_packets = 1;
1349
1350 p->running = true;
1351 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1352 QEMU_THREAD_JOINABLE);
1353 atomic_inc(&multifd_recv_state->count);
1354 return multifd_recv_state->count == migrate_multifd_channels();
1355}
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1370 ram_addr_t offset)
1371{
1372 size_t size, len;
1373
1374 if (block == rs->last_sent_block) {
1375 offset |= RAM_SAVE_FLAG_CONTINUE;
1376 }
1377 qemu_put_be64(f, offset);
1378 size = 8;
1379
1380 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
1381 len = strlen(block->idstr);
1382 qemu_put_byte(f, len);
1383 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
1384 size += 1 + len;
1385 rs->last_sent_block = block;
1386 }
1387 return size;
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399static void mig_throttle_guest_down(void)
1400{
1401 MigrationState *s = migrate_get_current();
1402 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1403 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
1404 int pct_max = s->parameters.max_cpu_throttle;
1405
1406
1407 if (!cpu_throttle_active()) {
1408 cpu_throttle_set(pct_initial);
1409 } else {
1410
1411 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1412 pct_max));
1413 }
1414}
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
1429{
1430 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1431 return;
1432 }
1433
1434
1435
1436 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
1437 ram_counters.dirty_sync_count);
1438}
1439
1440#define ENCODING_FLAG_XBZRLE 0x1
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
1457 ram_addr_t current_addr, RAMBlock *block,
1458 ram_addr_t offset, bool last_stage)
1459{
1460 int encoded_len = 0, bytes_xbzrle;
1461 uint8_t *prev_cached_page;
1462
1463 if (!cache_is_cached(XBZRLE.cache, current_addr,
1464 ram_counters.dirty_sync_count)) {
1465 xbzrle_counters.cache_miss++;
1466 if (!last_stage) {
1467 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
1468 ram_counters.dirty_sync_count) == -1) {
1469 return -1;
1470 } else {
1471
1472
1473 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1474 }
1475 }
1476 return -1;
1477 }
1478
1479 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1480
1481
1482 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1483
1484
1485 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1486 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1487 TARGET_PAGE_SIZE);
1488 if (encoded_len == 0) {
1489 trace_save_xbzrle_page_skipping();
1490 return 0;
1491 } else if (encoded_len == -1) {
1492 trace_save_xbzrle_page_overflow();
1493 xbzrle_counters.overflow++;
1494
1495 if (!last_stage) {
1496 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1497 *current_data = prev_cached_page;
1498 }
1499 return -1;
1500 }
1501
1502
1503 if (!last_stage) {
1504 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1505 }
1506
1507
1508 bytes_xbzrle = save_page_header(rs, rs->f, block,
1509 offset | RAM_SAVE_FLAG_XBZRLE);
1510 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1511 qemu_put_be16(rs->f, encoded_len);
1512 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
1513 bytes_xbzrle += encoded_len + 1 + 2;
1514 xbzrle_counters.pages++;
1515 xbzrle_counters.bytes += bytes_xbzrle;
1516 ram_counters.transferred += bytes_xbzrle;
1517
1518 return 1;
1519}
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532static inline
1533unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1534 unsigned long start)
1535{
1536 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1537 unsigned long *bitmap = rb->bmap;
1538 unsigned long next;
1539
1540 if (!qemu_ram_is_migratable(rb)) {
1541 return size;
1542 }
1543
1544 if (rs->ram_bulk_stage && start > 0) {
1545 next = start + 1;
1546 } else {
1547 next = find_next_bit(bitmap, size, start);
1548 }
1549
1550 return next;
1551}
1552
1553static inline bool migration_bitmap_clear_dirty(RAMState *rs,
1554 RAMBlock *rb,
1555 unsigned long page)
1556{
1557 bool ret;
1558
1559 ret = test_and_clear_bit(page, rb->bmap);
1560
1561 if (ret) {
1562 rs->migration_dirty_pages--;
1563 }
1564 return ret;
1565}
1566
1567static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1568 ram_addr_t start, ram_addr_t length)
1569{
1570 rs->migration_dirty_pages +=
1571 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
1572 &rs->num_dirty_pages_period);
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584uint64_t ram_pagesize_summary(void)
1585{
1586 RAMBlock *block;
1587 uint64_t summary = 0;
1588
1589 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1590 summary |= block->page_size;
1591 }
1592
1593 return summary;
1594}
1595
1596static void migration_update_rates(RAMState *rs, int64_t end_time)
1597{
1598 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
1599 double compressed_size;
1600
1601
1602 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1603 / (end_time - rs->time_last_bitmap_sync);
1604
1605 if (!page_count) {
1606 return;
1607 }
1608
1609 if (migrate_use_xbzrle()) {
1610 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1611 rs->xbzrle_cache_miss_prev) / page_count;
1612 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1613 }
1614
1615 if (migrate_use_compression()) {
1616 compression_counters.busy_rate = (double)(compression_counters.busy -
1617 rs->compress_thread_busy_prev) / page_count;
1618 rs->compress_thread_busy_prev = compression_counters.busy;
1619
1620 compressed_size = compression_counters.compressed_size -
1621 rs->compressed_size_prev;
1622 if (compressed_size) {
1623 double uncompressed_size = (compression_counters.pages -
1624 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1625
1626
1627 compression_counters.compression_rate =
1628 uncompressed_size / compressed_size;
1629
1630 rs->compress_pages_prev = compression_counters.pages;
1631 rs->compressed_size_prev = compression_counters.compressed_size;
1632 }
1633 }
1634}
1635
1636static void migration_bitmap_sync(RAMState *rs)
1637{
1638 RAMBlock *block;
1639 int64_t end_time;
1640 uint64_t bytes_xfer_now;
1641
1642 ram_counters.dirty_sync_count++;
1643
1644 if (!rs->time_last_bitmap_sync) {
1645 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1646 }
1647
1648 trace_migration_bitmap_sync_start();
1649 memory_global_dirty_log_sync();
1650
1651 qemu_mutex_lock(&rs->bitmap_mutex);
1652 rcu_read_lock();
1653 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1654 migration_bitmap_sync_range(rs, block, 0, block->used_length);
1655 }
1656 ram_counters.remaining = ram_bytes_remaining();
1657 rcu_read_unlock();
1658 qemu_mutex_unlock(&rs->bitmap_mutex);
1659
1660 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1661
1662 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1663
1664
1665 if (end_time > rs->time_last_bitmap_sync + 1000) {
1666 bytes_xfer_now = ram_counters.transferred;
1667
1668
1669
1670
1671 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1672
1673
1674
1675
1676
1677
1678 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
1679 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
1680 (++rs->dirty_rate_high_cnt >= 2)) {
1681 trace_migration_throttle();
1682 rs->dirty_rate_high_cnt = 0;
1683 mig_throttle_guest_down();
1684 }
1685 }
1686
1687 migration_update_rates(rs, end_time);
1688
1689 rs->target_page_count_prev = rs->target_page_count;
1690
1691
1692 rs->time_last_bitmap_sync = end_time;
1693 rs->num_dirty_pages_period = 0;
1694 rs->bytes_xfer_prev = bytes_xfer_now;
1695 }
1696 if (migrate_use_events()) {
1697 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1698 }
1699}
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1713 RAMBlock *block, ram_addr_t offset)
1714{
1715 uint8_t *p = block->host + offset;
1716 int len = 0;
1717
1718 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1719 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1720 qemu_put_byte(file, 0);
1721 len += 1;
1722 }
1723 return len;
1724}
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1736{
1737 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1738
1739 if (len) {
1740 ram_counters.duplicate++;
1741 ram_counters.transferred += len;
1742 return 1;
1743 }
1744 return -1;
1745}
1746
1747static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1748{
1749 if (!migrate_release_ram() || !migration_in_postcopy()) {
1750 return;
1751 }
1752
1753 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
1754}
1755
1756
1757
1758
1759
1760
1761
1762
1763static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1764 int *pages)
1765{
1766 uint64_t bytes_xmit = 0;
1767 int ret;
1768
1769 *pages = -1;
1770 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1771 &bytes_xmit);
1772 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1773 return false;
1774 }
1775
1776 if (bytes_xmit) {
1777 ram_counters.transferred += bytes_xmit;
1778 *pages = 1;
1779 }
1780
1781 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1782 return true;
1783 }
1784
1785 if (bytes_xmit > 0) {
1786 ram_counters.normal++;
1787 } else if (bytes_xmit == 0) {
1788 ram_counters.duplicate++;
1789 }
1790
1791 return true;
1792}
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1806 uint8_t *buf, bool async)
1807{
1808 ram_counters.transferred += save_page_header(rs, rs->f, block,
1809 offset | RAM_SAVE_FLAG_PAGE);
1810 if (async) {
1811 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1812 migrate_release_ram() &
1813 migration_in_postcopy());
1814 } else {
1815 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1816 }
1817 ram_counters.transferred += TARGET_PAGE_SIZE;
1818 ram_counters.normal++;
1819 return 1;
1820}
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1836{
1837 int pages = -1;
1838 uint8_t *p;
1839 bool send_async = true;
1840 RAMBlock *block = pss->block;
1841 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1842 ram_addr_t current_addr = block->offset + offset;
1843
1844 p = block->host + offset;
1845 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1846
1847 XBZRLE_cache_lock();
1848 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1849 migrate_use_xbzrle()) {
1850 pages = save_xbzrle_page(rs, &p, current_addr, block,
1851 offset, last_stage);
1852 if (!last_stage) {
1853
1854
1855
1856 send_async = false;
1857 }
1858 }
1859
1860
1861 if (pages == -1) {
1862 pages = save_normal_page(rs, block, offset, p, send_async);
1863 }
1864
1865 XBZRLE_cache_unlock();
1866
1867 return pages;
1868}
1869
1870static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1871 ram_addr_t offset)
1872{
1873 multifd_queue_page(block, offset);
1874 ram_counters.normal++;
1875
1876 return 1;
1877}
1878
1879static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1880 ram_addr_t offset, uint8_t *source_buf)
1881{
1882 RAMState *rs = ram_state;
1883 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1884 bool zero_page = false;
1885 int ret;
1886
1887 if (save_zero_page_to_file(rs, f, block, offset)) {
1888 zero_page = true;
1889 goto exit;
1890 }
1891
1892 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1893
1894
1895
1896
1897
1898
1899 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1900 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1901 if (ret < 0) {
1902 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1903 error_report("compressed data failed!");
1904 return false;
1905 }
1906
1907exit:
1908 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1909 return zero_page;
1910}
1911
1912static void
1913update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1914{
1915 ram_counters.transferred += bytes_xmit;
1916
1917 if (param->zero_page) {
1918 ram_counters.duplicate++;
1919 return;
1920 }
1921
1922
1923 compression_counters.compressed_size += bytes_xmit - 8;
1924 compression_counters.pages++;
1925}
1926
1927static bool save_page_use_compression(RAMState *rs);
1928
1929static void flush_compressed_data(RAMState *rs)
1930{
1931 int idx, len, thread_count;
1932
1933 if (!save_page_use_compression(rs)) {
1934 return;
1935 }
1936 thread_count = migrate_compress_threads();
1937
1938 qemu_mutex_lock(&comp_done_lock);
1939 for (idx = 0; idx < thread_count; idx++) {
1940 while (!comp_param[idx].done) {
1941 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1942 }
1943 }
1944 qemu_mutex_unlock(&comp_done_lock);
1945
1946 for (idx = 0; idx < thread_count; idx++) {
1947 qemu_mutex_lock(&comp_param[idx].mutex);
1948 if (!comp_param[idx].quit) {
1949 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1950
1951
1952
1953
1954
1955 update_compress_thread_counts(&comp_param[idx], len);
1956 }
1957 qemu_mutex_unlock(&comp_param[idx].mutex);
1958 }
1959}
1960
1961static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1962 ram_addr_t offset)
1963{
1964 param->block = block;
1965 param->offset = offset;
1966}
1967
1968static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1969 ram_addr_t offset)
1970{
1971 int idx, thread_count, bytes_xmit = -1, pages = -1;
1972 bool wait = migrate_compress_wait_thread();
1973
1974 thread_count = migrate_compress_threads();
1975 qemu_mutex_lock(&comp_done_lock);
1976retry:
1977 for (idx = 0; idx < thread_count; idx++) {
1978 if (comp_param[idx].done) {
1979 comp_param[idx].done = false;
1980 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1981 qemu_mutex_lock(&comp_param[idx].mutex);
1982 set_compress_params(&comp_param[idx], block, offset);
1983 qemu_cond_signal(&comp_param[idx].cond);
1984 qemu_mutex_unlock(&comp_param[idx].mutex);
1985 pages = 1;
1986 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1987 break;
1988 }
1989 }
1990
1991
1992
1993
1994
1995 if (pages < 0 && wait) {
1996 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1997 goto retry;
1998 }
1999 qemu_mutex_unlock(&comp_done_lock);
2000
2001 return pages;
2002}
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
2015{
2016 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2017 if (pss->complete_round && pss->block == rs->last_seen_block &&
2018 pss->page >= rs->last_page) {
2019
2020
2021
2022
2023 *again = false;
2024 return false;
2025 }
2026 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
2027
2028 pss->page = 0;
2029 pss->block = QLIST_NEXT_RCU(pss->block, next);
2030 if (!pss->block) {
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040 flush_compressed_data(rs);
2041
2042
2043 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2044
2045 pss->complete_round = true;
2046 rs->ram_bulk_stage = false;
2047 }
2048
2049 *again = true;
2050 return false;
2051 } else {
2052
2053 *again = true;
2054
2055 return true;
2056 }
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
2070{
2071 RAMBlock *block = NULL;
2072
2073 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2074 return NULL;
2075 }
2076
2077 qemu_mutex_lock(&rs->src_page_req_mutex);
2078 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2079 struct RAMSrcPageRequest *entry =
2080 QSIMPLEQ_FIRST(&rs->src_page_requests);
2081 block = entry->rb;
2082 *offset = entry->offset;
2083
2084 if (entry->len > TARGET_PAGE_SIZE) {
2085 entry->len -= TARGET_PAGE_SIZE;
2086 entry->offset += TARGET_PAGE_SIZE;
2087 } else {
2088 memory_region_unref(block->mr);
2089 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2090 g_free(entry);
2091 migration_consume_urgent_request();
2092 }
2093 }
2094 qemu_mutex_unlock(&rs->src_page_req_mutex);
2095
2096 return block;
2097}
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
2110{
2111 RAMBlock *block;
2112 ram_addr_t offset;
2113 bool dirty;
2114
2115 do {
2116 block = unqueue_page(rs, &offset);
2117
2118
2119
2120
2121
2122
2123 if (block) {
2124 unsigned long page;
2125
2126 page = offset >> TARGET_PAGE_BITS;
2127 dirty = test_bit(page, block->bmap);
2128 if (!dirty) {
2129 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
2130 page, test_bit(page, block->unsentmap));
2131 } else {
2132 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
2133 }
2134 }
2135
2136 } while (block && !dirty);
2137
2138 if (block) {
2139
2140
2141
2142
2143
2144
2145 rs->ram_bulk_stage = false;
2146
2147
2148
2149
2150
2151
2152 pss->block = block;
2153 pss->page = offset >> TARGET_PAGE_BITS;
2154 }
2155
2156 return !!block;
2157}
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167static void migration_page_queue_free(RAMState *rs)
2168{
2169 struct RAMSrcPageRequest *mspr, *next_mspr;
2170
2171
2172
2173 rcu_read_lock();
2174 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
2175 memory_region_unref(mspr->rb->mr);
2176 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2177 g_free(mspr);
2178 }
2179 rcu_read_unlock();
2180}
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2195{
2196 RAMBlock *ramblock;
2197 RAMState *rs = ram_state;
2198
2199 ram_counters.postcopy_requests++;
2200 rcu_read_lock();
2201 if (!rbname) {
2202
2203 ramblock = rs->last_req_rb;
2204
2205 if (!ramblock) {
2206
2207
2208
2209
2210 error_report("ram_save_queue_pages no previous block");
2211 goto err;
2212 }
2213 } else {
2214 ramblock = qemu_ram_block_by_name(rbname);
2215
2216 if (!ramblock) {
2217
2218 error_report("ram_save_queue_pages no block '%s'", rbname);
2219 goto err;
2220 }
2221 rs->last_req_rb = ramblock;
2222 }
2223 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2224 if (start+len > ramblock->used_length) {
2225 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2226 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2227 __func__, start, len, ramblock->used_length);
2228 goto err;
2229 }
2230
2231 struct RAMSrcPageRequest *new_entry =
2232 g_malloc0(sizeof(struct RAMSrcPageRequest));
2233 new_entry->rb = ramblock;
2234 new_entry->offset = start;
2235 new_entry->len = len;
2236
2237 memory_region_ref(ramblock->mr);
2238 qemu_mutex_lock(&rs->src_page_req_mutex);
2239 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2240 migration_make_urgent_request();
2241 qemu_mutex_unlock(&rs->src_page_req_mutex);
2242 rcu_read_unlock();
2243
2244 return 0;
2245
2246err:
2247 rcu_read_unlock();
2248 return -1;
2249}
2250
2251static bool save_page_use_compression(RAMState *rs)
2252{
2253 if (!migrate_use_compression()) {
2254 return false;
2255 }
2256
2257
2258
2259
2260
2261
2262 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2263 return true;
2264 }
2265
2266 return false;
2267}
2268
2269
2270
2271
2272
2273
2274static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2275{
2276 if (!save_page_use_compression(rs)) {
2277 return false;
2278 }
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290 if (block != rs->last_sent_block) {
2291 flush_compressed_data(rs);
2292 return false;
2293 }
2294
2295 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2296 return true;
2297 }
2298
2299 compression_counters.busy++;
2300 return false;
2301}
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
2313 bool last_stage)
2314{
2315 RAMBlock *block = pss->block;
2316 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2317 int res;
2318
2319 if (control_save_page(rs, block, offset, &res)) {
2320 return res;
2321 }
2322
2323 if (save_compress_page(rs, block, offset)) {
2324 return 1;
2325 }
2326
2327 res = save_zero_page(rs, block, offset);
2328 if (res > 0) {
2329
2330
2331
2332 if (!save_page_use_compression(rs)) {
2333 XBZRLE_cache_lock();
2334 xbzrle_cache_zero_page(rs, block->offset + offset);
2335 XBZRLE_cache_unlock();
2336 }
2337 ram_release_pages(block->idstr, offset, res);
2338 return res;
2339 }
2340
2341
2342
2343
2344
2345 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
2346 return ram_save_multifd_page(rs, block, offset);
2347 }
2348
2349 return ram_save_page(rs, pss, last_stage);
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2371 bool last_stage)
2372{
2373 int tmppages, pages = 0;
2374 size_t pagesize_bits =
2375 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2376
2377 if (!qemu_ram_is_migratable(pss->block)) {
2378 error_report("block %s should not be migrated !", pss->block->idstr);
2379 return 0;
2380 }
2381
2382 do {
2383
2384 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2385 pss->page++;
2386 continue;
2387 }
2388
2389 tmppages = ram_save_target_page(rs, pss, last_stage);
2390 if (tmppages < 0) {
2391 return tmppages;
2392 }
2393
2394 pages += tmppages;
2395 if (pss->block->unsentmap) {
2396 clear_bit(pss->page, pss->block->unsentmap);
2397 }
2398
2399 pss->page++;
2400 } while ((pss->page & (pagesize_bits - 1)) &&
2401 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
2402
2403
2404 pss->page--;
2405 return pages;
2406}
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2424{
2425 PageSearchStatus pss;
2426 int pages = 0;
2427 bool again, found;
2428
2429
2430 if (!ram_bytes_total()) {
2431 return pages;
2432 }
2433
2434 pss.block = rs->last_seen_block;
2435 pss.page = rs->last_page;
2436 pss.complete_round = false;
2437
2438 if (!pss.block) {
2439 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2440 }
2441
2442 do {
2443 again = true;
2444 found = get_queued_page(rs, &pss);
2445
2446 if (!found) {
2447
2448 found = find_dirty_block(rs, &pss, &again);
2449 }
2450
2451 if (found) {
2452 pages = ram_save_host_page(rs, &pss, last_stage);
2453 }
2454 } while (!pages && again);
2455
2456 rs->last_seen_block = pss.block;
2457 rs->last_page = pss.page;
2458
2459 return pages;
2460}
2461
2462void acct_update_position(QEMUFile *f, size_t size, bool zero)
2463{
2464 uint64_t pages = size / TARGET_PAGE_SIZE;
2465
2466 if (zero) {
2467 ram_counters.duplicate += pages;
2468 } else {
2469 ram_counters.normal += pages;
2470 ram_counters.transferred += size;
2471 qemu_update_position(f, size);
2472 }
2473}
2474
2475uint64_t ram_bytes_total(void)
2476{
2477 RAMBlock *block;
2478 uint64_t total = 0;
2479
2480 rcu_read_lock();
2481 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2482 total += block->used_length;
2483 }
2484 rcu_read_unlock();
2485 return total;
2486}
2487
2488static void xbzrle_load_setup(void)
2489{
2490 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2491}
2492
2493static void xbzrle_load_cleanup(void)
2494{
2495 g_free(XBZRLE.decoded_buf);
2496 XBZRLE.decoded_buf = NULL;
2497}
2498
2499static void ram_state_cleanup(RAMState **rsp)
2500{
2501 if (*rsp) {
2502 migration_page_queue_free(*rsp);
2503 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2504 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2505 g_free(*rsp);
2506 *rsp = NULL;
2507 }
2508}
2509
2510static void xbzrle_cleanup(void)
2511{
2512 XBZRLE_cache_lock();
2513 if (XBZRLE.cache) {
2514 cache_fini(XBZRLE.cache);
2515 g_free(XBZRLE.encoded_buf);
2516 g_free(XBZRLE.current_buf);
2517 g_free(XBZRLE.zero_target_page);
2518 XBZRLE.cache = NULL;
2519 XBZRLE.encoded_buf = NULL;
2520 XBZRLE.current_buf = NULL;
2521 XBZRLE.zero_target_page = NULL;
2522 }
2523 XBZRLE_cache_unlock();
2524}
2525
2526static void ram_save_cleanup(void *opaque)
2527{
2528 RAMState **rsp = opaque;
2529 RAMBlock *block;
2530
2531
2532
2533
2534 memory_global_dirty_log_stop();
2535
2536 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2537 g_free(block->bmap);
2538 block->bmap = NULL;
2539 g_free(block->unsentmap);
2540 block->unsentmap = NULL;
2541 }
2542
2543 xbzrle_cleanup();
2544 compress_threads_save_cleanup();
2545 ram_state_cleanup(rsp);
2546}
2547
2548static void ram_state_reset(RAMState *rs)
2549{
2550 rs->last_seen_block = NULL;
2551 rs->last_sent_block = NULL;
2552 rs->last_page = 0;
2553 rs->last_version = ram_list.version;
2554 rs->ram_bulk_stage = true;
2555}
2556
2557#define MAX_WAIT 50
2558
2559
2560
2561
2562
2563
2564void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2565 unsigned long pages)
2566{
2567 int64_t cur;
2568 int64_t linelen = 128;
2569 char linebuf[129];
2570
2571 for (cur = 0; cur < pages; cur += linelen) {
2572 int64_t curb;
2573 bool found = false;
2574
2575
2576
2577
2578 if (cur + linelen > pages) {
2579 linelen = pages - cur;
2580 }
2581 for (curb = 0; curb < linelen; curb++) {
2582 bool thisbit = test_bit(cur + curb, todump);
2583 linebuf[curb] = thisbit ? '1' : '.';
2584 found = found || (thisbit != expected);
2585 }
2586 if (found) {
2587 linebuf[curb] = '\0';
2588 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2589 }
2590 }
2591}
2592
2593
2594
2595void ram_postcopy_migrated_memory_release(MigrationState *ms)
2596{
2597 struct RAMBlock *block;
2598
2599 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2600 unsigned long *bitmap = block->bmap;
2601 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2602 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2603
2604 while (run_start < range) {
2605 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2606 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
2607 (run_end - run_start) << TARGET_PAGE_BITS);
2608 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2609 }
2610 }
2611}
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627static int postcopy_send_discard_bm_ram(MigrationState *ms,
2628 PostcopyDiscardState *pds,
2629 RAMBlock *block)
2630{
2631 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2632 unsigned long current;
2633 unsigned long *unsentmap = block->unsentmap;
2634
2635 for (current = 0; current < end; ) {
2636 unsigned long one = find_next_bit(unsentmap, end, current);
2637
2638 if (one <= end) {
2639 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2640 unsigned long discard_length;
2641
2642 if (zero >= end) {
2643 discard_length = end - one;
2644 } else {
2645 discard_length = zero - one;
2646 }
2647 if (discard_length) {
2648 postcopy_discard_send_range(ms, pds, one, discard_length);
2649 }
2650 current = one + discard_length;
2651 } else {
2652 current = one;
2653 }
2654 }
2655
2656 return 0;
2657}
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672static int postcopy_each_ram_send_discard(MigrationState *ms)
2673{
2674 struct RAMBlock *block;
2675 int ret;
2676
2677 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2678 PostcopyDiscardState *pds =
2679 postcopy_discard_send_init(ms, block->idstr);
2680
2681
2682
2683
2684
2685
2686 ret = postcopy_send_discard_bm_ram(ms, pds, block);
2687 postcopy_discard_send_finish(ms, pds);
2688 if (ret) {
2689 return ret;
2690 }
2691 }
2692
2693 return 0;
2694}
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2713 RAMBlock *block,
2714 PostcopyDiscardState *pds)
2715{
2716 RAMState *rs = ram_state;
2717 unsigned long *bitmap = block->bmap;
2718 unsigned long *unsentmap = block->unsentmap;
2719 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2720 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2721 unsigned long run_start;
2722
2723 if (block->page_size == TARGET_PAGE_SIZE) {
2724
2725 return;
2726 }
2727
2728 if (unsent_pass) {
2729
2730 run_start = find_next_zero_bit(unsentmap, pages, 0);
2731 } else {
2732
2733 run_start = find_next_bit(bitmap, pages, 0);
2734 }
2735
2736 while (run_start < pages) {
2737 bool do_fixup = false;
2738 unsigned long fixup_start_addr;
2739 unsigned long host_offset;
2740
2741
2742
2743
2744
2745 host_offset = run_start % host_ratio;
2746 if (host_offset) {
2747 do_fixup = true;
2748 run_start -= host_offset;
2749 fixup_start_addr = run_start;
2750
2751 run_start = run_start + host_ratio;
2752 } else {
2753
2754 unsigned long run_end;
2755 if (unsent_pass) {
2756 run_end = find_next_bit(unsentmap, pages, run_start + 1);
2757 } else {
2758 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
2759 }
2760
2761
2762
2763
2764
2765 host_offset = run_end % host_ratio;
2766 if (host_offset) {
2767 do_fixup = true;
2768 fixup_start_addr = run_end - host_offset;
2769
2770
2771
2772
2773 run_start = fixup_start_addr + host_ratio;
2774 } else {
2775
2776
2777
2778
2779 run_start = run_end + 1;
2780 }
2781 }
2782
2783 if (do_fixup) {
2784 unsigned long page;
2785
2786
2787 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2788
2789
2790
2791
2792
2793
2794
2795 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2796 host_ratio);
2797 }
2798
2799
2800 for (page = fixup_start_addr;
2801 page < fixup_start_addr + host_ratio; page++) {
2802
2803 set_bit(page, unsentmap);
2804
2805
2806
2807
2808
2809 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2810 }
2811 }
2812
2813 if (unsent_pass) {
2814
2815 run_start = find_next_zero_bit(unsentmap, pages, run_start);
2816 } else {
2817
2818 run_start = find_next_bit(bitmap, pages, run_start);
2819 }
2820 }
2821}
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2838{
2839 PostcopyDiscardState *pds =
2840 postcopy_discard_send_init(ms, block->idstr);
2841
2842
2843 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2844
2845
2846
2847
2848 postcopy_chunk_hostpages_pass(ms, false, block, pds);
2849
2850 postcopy_discard_send_finish(ms, pds);
2851 return 0;
2852}
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2870{
2871 RAMState *rs = ram_state;
2872 RAMBlock *block;
2873 int ret;
2874
2875 rcu_read_lock();
2876
2877
2878 migration_bitmap_sync(rs);
2879
2880
2881 rs->last_seen_block = NULL;
2882 rs->last_sent_block = NULL;
2883 rs->last_page = 0;
2884
2885 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2886 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2887 unsigned long *bitmap = block->bmap;
2888 unsigned long *unsentmap = block->unsentmap;
2889
2890 if (!unsentmap) {
2891
2892
2893
2894
2895 error_report("migration ram resized during precopy phase");
2896 rcu_read_unlock();
2897 return -EINVAL;
2898 }
2899
2900 ret = postcopy_chunk_hostpages(ms, block);
2901 if (ret) {
2902 rcu_read_unlock();
2903 return ret;
2904 }
2905
2906
2907
2908
2909 bitmap_or(unsentmap, unsentmap, bitmap, pages);
2910#ifdef DEBUG_POSTCOPY
2911 ram_debug_dump_bitmap(unsentmap, true, pages);
2912#endif
2913 }
2914 trace_ram_postcopy_send_discard_bitmap();
2915
2916 ret = postcopy_each_ram_send_discard(ms);
2917 rcu_read_unlock();
2918
2919 return ret;
2920}
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2933{
2934 int ret = -1;
2935
2936 trace_ram_discard_range(rbname, start, length);
2937
2938 rcu_read_lock();
2939 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2940
2941 if (!rb) {
2942 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2943 goto err;
2944 }
2945
2946
2947
2948
2949
2950 if (rb->receivedmap) {
2951 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2952 length >> qemu_target_page_bits());
2953 }
2954
2955 ret = ram_block_discard_range(rb, start, length);
2956
2957err:
2958 rcu_read_unlock();
2959
2960 return ret;
2961}
2962
2963
2964
2965
2966
2967static int xbzrle_init(void)
2968{
2969 Error *local_err = NULL;
2970
2971 if (!migrate_use_xbzrle()) {
2972 return 0;
2973 }
2974
2975 XBZRLE_cache_lock();
2976
2977 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2978 if (!XBZRLE.zero_target_page) {
2979 error_report("%s: Error allocating zero page", __func__);
2980 goto err_out;
2981 }
2982
2983 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2984 TARGET_PAGE_SIZE, &local_err);
2985 if (!XBZRLE.cache) {
2986 error_report_err(local_err);
2987 goto free_zero_page;
2988 }
2989
2990 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2991 if (!XBZRLE.encoded_buf) {
2992 error_report("%s: Error allocating encoded_buf", __func__);
2993 goto free_cache;
2994 }
2995
2996 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2997 if (!XBZRLE.current_buf) {
2998 error_report("%s: Error allocating current_buf", __func__);
2999 goto free_encoded_buf;
3000 }
3001
3002
3003 XBZRLE_cache_unlock();
3004 return 0;
3005
3006free_encoded_buf:
3007 g_free(XBZRLE.encoded_buf);
3008 XBZRLE.encoded_buf = NULL;
3009free_cache:
3010 cache_fini(XBZRLE.cache);
3011 XBZRLE.cache = NULL;
3012free_zero_page:
3013 g_free(XBZRLE.zero_target_page);
3014 XBZRLE.zero_target_page = NULL;
3015err_out:
3016 XBZRLE_cache_unlock();
3017 return -ENOMEM;
3018}
3019
3020static int ram_state_init(RAMState **rsp)
3021{
3022 *rsp = g_try_new0(RAMState, 1);
3023
3024 if (!*rsp) {
3025 error_report("%s: Init ramstate fail", __func__);
3026 return -1;
3027 }
3028
3029 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3030 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3031 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
3032
3033
3034
3035
3036
3037 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
3038
3039 ram_state_reset(*rsp);
3040
3041 return 0;
3042}
3043
3044static void ram_list_init_bitmaps(void)
3045{
3046 RAMBlock *block;
3047 unsigned long pages;
3048
3049
3050 if (ram_bytes_total()) {
3051 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3052 pages = block->max_length >> TARGET_PAGE_BITS;
3053 block->bmap = bitmap_new(pages);
3054 bitmap_set(block->bmap, 0, pages);
3055 if (migrate_postcopy_ram()) {
3056 block->unsentmap = bitmap_new(pages);
3057 bitmap_set(block->unsentmap, 0, pages);
3058 }
3059 }
3060 }
3061}
3062
3063static void ram_init_bitmaps(RAMState *rs)
3064{
3065
3066 qemu_mutex_lock_iothread();
3067 qemu_mutex_lock_ramlist();
3068 rcu_read_lock();
3069
3070 ram_list_init_bitmaps();
3071 memory_global_dirty_log_start();
3072 migration_bitmap_sync(rs);
3073
3074 rcu_read_unlock();
3075 qemu_mutex_unlock_ramlist();
3076 qemu_mutex_unlock_iothread();
3077}
3078
3079static int ram_init_all(RAMState **rsp)
3080{
3081 if (ram_state_init(rsp)) {
3082 return -1;
3083 }
3084
3085 if (xbzrle_init()) {
3086 ram_state_cleanup(rsp);
3087 return -1;
3088 }
3089
3090 ram_init_bitmaps(*rsp);
3091
3092 return 0;
3093}
3094
3095static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3096{
3097 RAMBlock *block;
3098 uint64_t pages = 0;
3099
3100
3101
3102
3103
3104
3105
3106 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3107 pages += bitmap_count_one(block->bmap,
3108 block->used_length >> TARGET_PAGE_BITS);
3109 }
3110
3111
3112 rs->migration_dirty_pages = pages;
3113
3114 rs->last_seen_block = NULL;
3115 rs->last_sent_block = NULL;
3116 rs->last_page = 0;
3117 rs->last_version = ram_list.version;
3118
3119
3120
3121
3122 rs->ram_bulk_stage = false;
3123
3124
3125 rs->f = out;
3126
3127 trace_ram_state_resume_prepare(pages);
3128}
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145static int ram_save_setup(QEMUFile *f, void *opaque)
3146{
3147 RAMState **rsp = opaque;
3148 RAMBlock *block;
3149
3150 if (compress_threads_save_setup()) {
3151 return -1;
3152 }
3153
3154
3155 if (!migration_in_colo_state()) {
3156 if (ram_init_all(rsp) != 0) {
3157 compress_threads_save_cleanup();
3158 return -1;
3159 }
3160 }
3161 (*rsp)->f = f;
3162
3163 rcu_read_lock();
3164
3165 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
3166
3167 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3168 qemu_put_byte(f, strlen(block->idstr));
3169 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3170 qemu_put_be64(f, block->used_length);
3171 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3172 qemu_put_be64(f, block->page_size);
3173 }
3174 }
3175
3176 rcu_read_unlock();
3177
3178 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3179 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3180
3181 multifd_send_sync_main();
3182 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3183 qemu_fflush(f);
3184
3185 return 0;
3186}
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196static int ram_save_iterate(QEMUFile *f, void *opaque)
3197{
3198 RAMState **temp = opaque;
3199 RAMState *rs = *temp;
3200 int ret;
3201 int i;
3202 int64_t t0;
3203 int done = 0;
3204
3205 if (blk_mig_bulk_active()) {
3206
3207
3208
3209 goto out;
3210 }
3211
3212 rcu_read_lock();
3213 if (ram_list.version != rs->last_version) {
3214 ram_state_reset(rs);
3215 }
3216
3217
3218 smp_rmb();
3219
3220 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3221
3222 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3223 i = 0;
3224 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3225 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3226 int pages;
3227
3228 if (qemu_file_get_error(f)) {
3229 break;
3230 }
3231
3232 pages = ram_find_and_save_block(rs, false);
3233
3234 if (pages == 0) {
3235 done = 1;
3236 break;
3237 }
3238
3239 if (pages < 0) {
3240 qemu_file_set_error(f, pages);
3241 break;
3242 }
3243
3244 rs->target_page_count += pages;
3245
3246
3247
3248
3249
3250
3251 if ((i & 63) == 0) {
3252 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3253 if (t1 > MAX_WAIT) {
3254 trace_ram_save_iterate_big_wait(t1, i);
3255 break;
3256 }
3257 }
3258 i++;
3259 }
3260 rcu_read_unlock();
3261
3262
3263
3264
3265
3266 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3267
3268 multifd_send_sync_main();
3269out:
3270 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3271 qemu_fflush(f);
3272 ram_counters.transferred += 8;
3273
3274 ret = qemu_file_get_error(f);
3275 if (ret < 0) {
3276 return ret;
3277 }
3278
3279 return done;
3280}
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292static int ram_save_complete(QEMUFile *f, void *opaque)
3293{
3294 RAMState **temp = opaque;
3295 RAMState *rs = *temp;
3296 int ret = 0;
3297
3298 rcu_read_lock();
3299
3300 if (!migration_in_postcopy()) {
3301 migration_bitmap_sync(rs);
3302 }
3303
3304 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3305
3306
3307
3308
3309 while (true) {
3310 int pages;
3311
3312 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3313
3314 if (pages == 0) {
3315 break;
3316 }
3317 if (pages < 0) {
3318 ret = pages;
3319 break;
3320 }
3321 }
3322
3323 flush_compressed_data(rs);
3324 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3325
3326 rcu_read_unlock();
3327
3328 multifd_send_sync_main();
3329 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3330 qemu_fflush(f);
3331
3332 return ret;
3333}
3334
3335static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3336 uint64_t *res_precopy_only,
3337 uint64_t *res_compatible,
3338 uint64_t *res_postcopy_only)
3339{
3340 RAMState **temp = opaque;
3341 RAMState *rs = *temp;
3342 uint64_t remaining_size;
3343
3344 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3345
3346 if (!migration_in_postcopy() &&
3347 remaining_size < max_size) {
3348 qemu_mutex_lock_iothread();
3349 rcu_read_lock();
3350 migration_bitmap_sync(rs);
3351 rcu_read_unlock();
3352 qemu_mutex_unlock_iothread();
3353 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3354 }
3355
3356 if (migrate_postcopy_ram()) {
3357
3358 *res_compatible += remaining_size;
3359 } else {
3360 *res_precopy_only += remaining_size;
3361 }
3362}
3363
3364static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3365{
3366 unsigned int xh_len;
3367 int xh_flags;
3368 uint8_t *loaded_data;
3369
3370
3371 xh_flags = qemu_get_byte(f);
3372 xh_len = qemu_get_be16(f);
3373
3374 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3375 error_report("Failed to load XBZRLE page - wrong compression!");
3376 return -1;
3377 }
3378
3379 if (xh_len > TARGET_PAGE_SIZE) {
3380 error_report("Failed to load XBZRLE page - len overflow!");
3381 return -1;
3382 }
3383 loaded_data = XBZRLE.decoded_buf;
3384
3385
3386 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3387
3388
3389 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3390 TARGET_PAGE_SIZE) == -1) {
3391 error_report("Failed to load XBZRLE page - decode error!");
3392 return -1;
3393 }
3394
3395 return 0;
3396}
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3409{
3410 static RAMBlock *block = NULL;
3411 char id[256];
3412 uint8_t len;
3413
3414 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3415 if (!block) {
3416 error_report("Ack, bad migration stream!");
3417 return NULL;
3418 }
3419 return block;
3420 }
3421
3422 len = qemu_get_byte(f);
3423 qemu_get_buffer(f, (uint8_t *)id, len);
3424 id[len] = 0;
3425
3426 block = qemu_ram_block_by_name(id);
3427 if (!block) {
3428 error_report("Can't find block %s", id);
3429 return NULL;
3430 }
3431
3432 if (!qemu_ram_is_migratable(block)) {
3433 error_report("block %s should not be migrated !", id);
3434 return NULL;
3435 }
3436
3437 return block;
3438}
3439
3440static inline void *host_from_ram_block_offset(RAMBlock *block,
3441 ram_addr_t offset)
3442{
3443 if (!offset_in_ramblock(block, offset)) {
3444 return NULL;
3445 }
3446
3447 return block->host + offset;
3448}
3449
3450static inline void *colo_cache_from_block_offset(RAMBlock *block,
3451 ram_addr_t offset)
3452{
3453 if (!offset_in_ramblock(block, offset)) {
3454 return NULL;
3455 }
3456 if (!block->colo_cache) {
3457 error_report("%s: colo_cache is NULL in block :%s",
3458 __func__, block->idstr);
3459 return NULL;
3460 }
3461
3462
3463
3464
3465
3466
3467 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3468 ram_state->migration_dirty_pages++;
3469 }
3470 return block->colo_cache + offset;
3471}
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3484{
3485 if (ch != 0 || !is_zero_range(host, size)) {
3486 memset(host, ch, size);
3487 }
3488}
3489
3490
3491static int
3492qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3493 const uint8_t *source, size_t source_len)
3494{
3495 int err;
3496
3497 err = inflateReset(stream);
3498 if (err != Z_OK) {
3499 return -1;
3500 }
3501
3502 stream->avail_in = source_len;
3503 stream->next_in = (uint8_t *)source;
3504 stream->avail_out = dest_len;
3505 stream->next_out = dest;
3506
3507 err = inflate(stream, Z_NO_FLUSH);
3508 if (err != Z_STREAM_END) {
3509 return -1;
3510 }
3511
3512 return stream->total_out;
3513}
3514
3515static void *do_data_decompress(void *opaque)
3516{
3517 DecompressParam *param = opaque;
3518 unsigned long pagesize;
3519 uint8_t *des;
3520 int len, ret;
3521
3522 qemu_mutex_lock(¶m->mutex);
3523 while (!param->quit) {
3524 if (param->des) {
3525 des = param->des;
3526 len = param->len;
3527 param->des = 0;
3528 qemu_mutex_unlock(¶m->mutex);
3529
3530 pagesize = TARGET_PAGE_SIZE;
3531
3532 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3533 param->compbuf, len);
3534 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3535 error_report("decompress data failed");
3536 qemu_file_set_error(decomp_file, ret);
3537 }
3538
3539 qemu_mutex_lock(&decomp_done_lock);
3540 param->done = true;
3541 qemu_cond_signal(&decomp_done_cond);
3542 qemu_mutex_unlock(&decomp_done_lock);
3543
3544 qemu_mutex_lock(¶m->mutex);
3545 } else {
3546 qemu_cond_wait(¶m->cond, ¶m->mutex);
3547 }
3548 }
3549 qemu_mutex_unlock(¶m->mutex);
3550
3551 return NULL;
3552}
3553
3554static int wait_for_decompress_done(void)
3555{
3556 int idx, thread_count;
3557
3558 if (!migrate_use_compression()) {
3559 return 0;
3560 }
3561
3562 thread_count = migrate_decompress_threads();
3563 qemu_mutex_lock(&decomp_done_lock);
3564 for (idx = 0; idx < thread_count; idx++) {
3565 while (!decomp_param[idx].done) {
3566 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3567 }
3568 }
3569 qemu_mutex_unlock(&decomp_done_lock);
3570 return qemu_file_get_error(decomp_file);
3571}
3572
3573static void compress_threads_load_cleanup(void)
3574{
3575 int i, thread_count;
3576
3577 if (!migrate_use_compression()) {
3578 return;
3579 }
3580 thread_count = migrate_decompress_threads();
3581 for (i = 0; i < thread_count; i++) {
3582
3583
3584
3585
3586 if (!decomp_param[i].compbuf) {
3587 break;
3588 }
3589
3590 qemu_mutex_lock(&decomp_param[i].mutex);
3591 decomp_param[i].quit = true;
3592 qemu_cond_signal(&decomp_param[i].cond);
3593 qemu_mutex_unlock(&decomp_param[i].mutex);
3594 }
3595 for (i = 0; i < thread_count; i++) {
3596 if (!decomp_param[i].compbuf) {
3597 break;
3598 }
3599
3600 qemu_thread_join(decompress_threads + i);
3601 qemu_mutex_destroy(&decomp_param[i].mutex);
3602 qemu_cond_destroy(&decomp_param[i].cond);
3603 inflateEnd(&decomp_param[i].stream);
3604 g_free(decomp_param[i].compbuf);
3605 decomp_param[i].compbuf = NULL;
3606 }
3607 g_free(decompress_threads);
3608 g_free(decomp_param);
3609 decompress_threads = NULL;
3610 decomp_param = NULL;
3611 decomp_file = NULL;
3612}
3613
3614static int compress_threads_load_setup(QEMUFile *f)
3615{
3616 int i, thread_count;
3617
3618 if (!migrate_use_compression()) {
3619 return 0;
3620 }
3621
3622 thread_count = migrate_decompress_threads();
3623 decompress_threads = g_new0(QemuThread, thread_count);
3624 decomp_param = g_new0(DecompressParam, thread_count);
3625 qemu_mutex_init(&decomp_done_lock);
3626 qemu_cond_init(&decomp_done_cond);
3627 decomp_file = f;
3628 for (i = 0; i < thread_count; i++) {
3629 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3630 goto exit;
3631 }
3632
3633 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3634 qemu_mutex_init(&decomp_param[i].mutex);
3635 qemu_cond_init(&decomp_param[i].cond);
3636 decomp_param[i].done = true;
3637 decomp_param[i].quit = false;
3638 qemu_thread_create(decompress_threads + i, "decompress",
3639 do_data_decompress, decomp_param + i,
3640 QEMU_THREAD_JOINABLE);
3641 }
3642 return 0;
3643exit:
3644 compress_threads_load_cleanup();
3645 return -1;
3646}
3647
3648static void decompress_data_with_multi_threads(QEMUFile *f,
3649 void *host, int len)
3650{
3651 int idx, thread_count;
3652
3653 thread_count = migrate_decompress_threads();
3654 qemu_mutex_lock(&decomp_done_lock);
3655 while (true) {
3656 for (idx = 0; idx < thread_count; idx++) {
3657 if (decomp_param[idx].done) {
3658 decomp_param[idx].done = false;
3659 qemu_mutex_lock(&decomp_param[idx].mutex);
3660 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3661 decomp_param[idx].des = host;
3662 decomp_param[idx].len = len;
3663 qemu_cond_signal(&decomp_param[idx].cond);
3664 qemu_mutex_unlock(&decomp_param[idx].mutex);
3665 break;
3666 }
3667 }
3668 if (idx < thread_count) {
3669 break;
3670 } else {
3671 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3672 }
3673 }
3674 qemu_mutex_unlock(&decomp_done_lock);
3675}
3676
3677
3678
3679
3680
3681
3682int colo_init_ram_cache(void)
3683{
3684 RAMBlock *block;
3685
3686 rcu_read_lock();
3687 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3688 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3689 NULL,
3690 false);
3691 if (!block->colo_cache) {
3692 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3693 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3694 block->used_length);
3695 goto out_locked;
3696 }
3697 memcpy(block->colo_cache, block->host, block->used_length);
3698 }
3699 rcu_read_unlock();
3700
3701
3702
3703
3704
3705 if (ram_bytes_total()) {
3706 RAMBlock *block;
3707
3708 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3709 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3710
3711 block->bmap = bitmap_new(pages);
3712 bitmap_set(block->bmap, 0, pages);
3713 }
3714 }
3715 ram_state = g_new0(RAMState, 1);
3716 ram_state->migration_dirty_pages = 0;
3717 memory_global_dirty_log_start();
3718
3719 return 0;
3720
3721out_locked:
3722
3723 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3724 if (block->colo_cache) {
3725 qemu_anon_ram_free(block->colo_cache, block->used_length);
3726 block->colo_cache = NULL;
3727 }
3728 }
3729
3730 rcu_read_unlock();
3731 return -errno;
3732}
3733
3734
3735void colo_release_ram_cache(void)
3736{
3737 RAMBlock *block;
3738
3739 memory_global_dirty_log_stop();
3740 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3741 g_free(block->bmap);
3742 block->bmap = NULL;
3743 }
3744
3745 rcu_read_lock();
3746
3747 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3748 if (block->colo_cache) {
3749 qemu_anon_ram_free(block->colo_cache, block->used_length);
3750 block->colo_cache = NULL;
3751 }
3752 }
3753
3754 rcu_read_unlock();
3755 g_free(ram_state);
3756 ram_state = NULL;
3757}
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767static int ram_load_setup(QEMUFile *f, void *opaque)
3768{
3769 if (compress_threads_load_setup(f)) {
3770 return -1;
3771 }
3772
3773 xbzrle_load_setup();
3774 ramblock_recv_map_init();
3775
3776 return 0;
3777}
3778
3779static int ram_load_cleanup(void *opaque)
3780{
3781 RAMBlock *rb;
3782
3783 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
3784 if (ramblock_is_pmem(rb)) {
3785 pmem_persist(rb->host, rb->used_length);
3786 }
3787 }
3788
3789 xbzrle_load_cleanup();
3790 compress_threads_load_cleanup();
3791
3792 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
3793 g_free(rb->receivedmap);
3794 rb->receivedmap = NULL;
3795 }
3796
3797 return 0;
3798}
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3812{
3813 return postcopy_ram_incoming_init(mis);
3814}
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826static int ram_load_postcopy(QEMUFile *f)
3827{
3828 int flags = 0, ret = 0;
3829 bool place_needed = false;
3830 bool matches_target_page_size = false;
3831 MigrationIncomingState *mis = migration_incoming_get_current();
3832
3833 void *postcopy_host_page = postcopy_get_tmp_page(mis);
3834 void *last_host = NULL;
3835 bool all_zero = false;
3836
3837 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3838 ram_addr_t addr;
3839 void *host = NULL;
3840 void *page_buffer = NULL;
3841 void *place_source = NULL;
3842 RAMBlock *block = NULL;
3843 uint8_t ch;
3844
3845 addr = qemu_get_be64(f);
3846
3847
3848
3849
3850
3851 ret = qemu_file_get_error(f);
3852 if (ret) {
3853 break;
3854 }
3855
3856 flags = addr & ~TARGET_PAGE_MASK;
3857 addr &= TARGET_PAGE_MASK;
3858
3859 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3860 place_needed = false;
3861 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
3862 block = ram_block_from_stream(f, flags);
3863
3864 host = host_from_ram_block_offset(block, addr);
3865 if (!host) {
3866 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3867 ret = -EINVAL;
3868 break;
3869 }
3870 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881 page_buffer = postcopy_host_page +
3882 ((uintptr_t)host & (block->page_size - 1));
3883
3884 if (!((uintptr_t)host & (block->page_size - 1))) {
3885 all_zero = true;
3886 } else {
3887
3888 if (host != (last_host + TARGET_PAGE_SIZE)) {
3889 error_report("Non-sequential target page %p/%p",
3890 host, last_host);
3891 ret = -EINVAL;
3892 break;
3893 }
3894 }
3895
3896
3897
3898
3899
3900
3901 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
3902 (block->page_size - 1)) == 0;
3903 place_source = postcopy_host_page;
3904 }
3905 last_host = host;
3906
3907 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3908 case RAM_SAVE_FLAG_ZERO:
3909 ch = qemu_get_byte(f);
3910 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3911 if (ch) {
3912 all_zero = false;
3913 }
3914 break;
3915
3916 case RAM_SAVE_FLAG_PAGE:
3917 all_zero = false;
3918 if (!matches_target_page_size) {
3919
3920 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3921 } else {
3922
3923
3924
3925
3926
3927
3928
3929
3930 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3931 TARGET_PAGE_SIZE);
3932 }
3933 break;
3934 case RAM_SAVE_FLAG_EOS:
3935
3936 multifd_recv_sync_main();
3937 break;
3938 default:
3939 error_report("Unknown combination of migration flags: %#x"
3940 " (postcopy mode)", flags);
3941 ret = -EINVAL;
3942 break;
3943 }
3944
3945
3946 if (!ret && qemu_file_get_error(f)) {
3947 ret = qemu_file_get_error(f);
3948 }
3949
3950 if (!ret && place_needed) {
3951
3952 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3953
3954 if (all_zero) {
3955 ret = postcopy_place_page_zero(mis, place_dest,
3956 block);
3957 } else {
3958 ret = postcopy_place_page(mis, place_dest,
3959 place_source, block);
3960 }
3961 }
3962 }
3963
3964 return ret;
3965}
3966
3967static bool postcopy_is_advised(void)
3968{
3969 PostcopyState ps = postcopy_state_get();
3970 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3971}
3972
3973static bool postcopy_is_running(void)
3974{
3975 PostcopyState ps = postcopy_state_get();
3976 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3977}
3978
3979
3980
3981
3982
3983static void colo_flush_ram_cache(void)
3984{
3985 RAMBlock *block = NULL;
3986 void *dst_host;
3987 void *src_host;
3988 unsigned long offset = 0;
3989
3990 memory_global_dirty_log_sync();
3991 rcu_read_lock();
3992 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3993 migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
3994 }
3995 rcu_read_unlock();
3996
3997 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3998 rcu_read_lock();
3999 block = QLIST_FIRST_RCU(&ram_list.blocks);
4000
4001 while (block) {
4002 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4003
4004 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4005 offset = 0;
4006 block = QLIST_NEXT_RCU(block, next);
4007 } else {
4008 migration_bitmap_clear_dirty(ram_state, block, offset);
4009 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4010 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4011 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4012 }
4013 }
4014
4015 rcu_read_unlock();
4016 trace_colo_flush_ram_cache_end();
4017}
4018
4019static int ram_load(QEMUFile *f, void *opaque, int version_id)
4020{
4021 int flags = 0, ret = 0, invalid_flags = 0;
4022 static uint64_t seq_iter;
4023 int len = 0;
4024
4025
4026
4027
4028 bool postcopy_running = postcopy_is_running();
4029
4030 bool postcopy_advised = postcopy_is_advised();
4031
4032 seq_iter++;
4033
4034 if (version_id != 4) {
4035 ret = -EINVAL;
4036 }
4037
4038 if (!migrate_use_compression()) {
4039 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4040 }
4041
4042
4043
4044
4045
4046 rcu_read_lock();
4047
4048 if (postcopy_running) {
4049 ret = ram_load_postcopy(f);
4050 }
4051
4052 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4053 ram_addr_t addr, total_ram_bytes;
4054 void *host = NULL;
4055 uint8_t ch;
4056
4057 addr = qemu_get_be64(f);
4058 flags = addr & ~TARGET_PAGE_MASK;
4059 addr &= TARGET_PAGE_MASK;
4060
4061 if (flags & invalid_flags) {
4062 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4063 error_report("Received an unexpected compressed page");
4064 }
4065
4066 ret = -EINVAL;
4067 break;
4068 }
4069
4070 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4071 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4072 RAMBlock *block = ram_block_from_stream(f, flags);
4073
4074
4075
4076
4077 if (migration_incoming_in_colo_state()) {
4078 host = colo_cache_from_block_offset(block, addr);
4079 } else {
4080 host = host_from_ram_block_offset(block, addr);
4081 }
4082 if (!host) {
4083 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4084 ret = -EINVAL;
4085 break;
4086 }
4087
4088 if (!migration_incoming_in_colo_state()) {
4089 ramblock_recv_bitmap_set(block, host);
4090 }
4091
4092 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
4093 }
4094
4095 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4096 case RAM_SAVE_FLAG_MEM_SIZE:
4097
4098 total_ram_bytes = addr;
4099 while (!ret && total_ram_bytes) {
4100 RAMBlock *block;
4101 char id[256];
4102 ram_addr_t length;
4103
4104 len = qemu_get_byte(f);
4105 qemu_get_buffer(f, (uint8_t *)id, len);
4106 id[len] = 0;
4107 length = qemu_get_be64(f);
4108
4109 block = qemu_ram_block_by_name(id);
4110 if (block && !qemu_ram_is_migratable(block)) {
4111 error_report("block %s should not be migrated !", id);
4112 ret = -EINVAL;
4113 } else if (block) {
4114 if (length != block->used_length) {
4115 Error *local_err = NULL;
4116
4117 ret = qemu_ram_resize(block, length,
4118 &local_err);
4119 if (local_err) {
4120 error_report_err(local_err);
4121 }
4122 }
4123
4124 if (postcopy_advised &&
4125 block->page_size != qemu_host_page_size) {
4126 uint64_t remote_page_size = qemu_get_be64(f);
4127 if (remote_page_size != block->page_size) {
4128 error_report("Mismatched RAM page size %s "
4129 "(local) %zd != %" PRId64,
4130 id, block->page_size,
4131 remote_page_size);
4132 ret = -EINVAL;
4133 }
4134 }
4135 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4136 block->idstr);
4137 } else {
4138 error_report("Unknown ramblock \"%s\", cannot "
4139 "accept migration", id);
4140 ret = -EINVAL;
4141 }
4142
4143 total_ram_bytes -= length;
4144 }
4145 break;
4146
4147 case RAM_SAVE_FLAG_ZERO:
4148 ch = qemu_get_byte(f);
4149 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4150 break;
4151
4152 case RAM_SAVE_FLAG_PAGE:
4153 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4154 break;
4155
4156 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4157 len = qemu_get_be32(f);
4158 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4159 error_report("Invalid compressed data length: %d", len);
4160 ret = -EINVAL;
4161 break;
4162 }
4163 decompress_data_with_multi_threads(f, host, len);
4164 break;
4165
4166 case RAM_SAVE_FLAG_XBZRLE:
4167 if (load_xbzrle(f, addr, host) < 0) {
4168 error_report("Failed to decompress XBZRLE page at "
4169 RAM_ADDR_FMT, addr);
4170 ret = -EINVAL;
4171 break;
4172 }
4173 break;
4174 case RAM_SAVE_FLAG_EOS:
4175
4176 multifd_recv_sync_main();
4177 break;
4178 default:
4179 if (flags & RAM_SAVE_FLAG_HOOK) {
4180 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
4181 } else {
4182 error_report("Unknown combination of migration flags: %#x",
4183 flags);
4184 ret = -EINVAL;
4185 }
4186 }
4187 if (!ret) {
4188 ret = qemu_file_get_error(f);
4189 }
4190 }
4191
4192 ret |= wait_for_decompress_done();
4193 rcu_read_unlock();
4194 trace_ram_load_complete(ret, seq_iter);
4195
4196 if (!ret && migration_incoming_in_colo_state()) {
4197 colo_flush_ram_cache();
4198 }
4199 return ret;
4200}
4201
4202static bool ram_has_postcopy(void *opaque)
4203{
4204 RAMBlock *rb;
4205 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
4206 if (ramblock_is_pmem(rb)) {
4207 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4208 "is not supported now!", rb->idstr, rb->host);
4209 return false;
4210 }
4211 }
4212
4213 return migrate_postcopy_ram();
4214}
4215
4216
4217static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4218{
4219 RAMBlock *block;
4220 QEMUFile *file = s->to_dst_file;
4221 int ramblock_count = 0;
4222
4223 trace_ram_dirty_bitmap_sync_start();
4224
4225 RAMBLOCK_FOREACH_MIGRATABLE(block) {
4226 qemu_savevm_send_recv_bitmap(file, block->idstr);
4227 trace_ram_dirty_bitmap_request(block->idstr);
4228 ramblock_count++;
4229 }
4230
4231 trace_ram_dirty_bitmap_sync_wait();
4232
4233
4234 while (ramblock_count--) {
4235 qemu_sem_wait(&s->rp_state.rp_sem);
4236 }
4237
4238 trace_ram_dirty_bitmap_sync_complete();
4239
4240 return 0;
4241}
4242
4243static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4244{
4245 qemu_sem_post(&s->rp_state.rp_sem);
4246}
4247
4248
4249
4250
4251
4252
4253int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4254{
4255 int ret = -EINVAL;
4256 QEMUFile *file = s->rp_state.from_dst_file;
4257 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4258 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4259 uint64_t size, end_mark;
4260
4261 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4262
4263 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4264 error_report("%s: incorrect state %s", __func__,
4265 MigrationStatus_str(s->state));
4266 return -EINVAL;
4267 }
4268
4269
4270
4271
4272
4273 local_size = ROUND_UP(local_size, 8);
4274
4275
4276 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4277
4278 size = qemu_get_be64(file);
4279
4280
4281 if (size != local_size) {
4282 error_report("%s: ramblock '%s' bitmap size mismatch "
4283 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4284 block->idstr, size, local_size);
4285 ret = -EINVAL;
4286 goto out;
4287 }
4288
4289 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4290 end_mark = qemu_get_be64(file);
4291
4292 ret = qemu_file_get_error(file);
4293 if (ret || size != local_size) {
4294 error_report("%s: read bitmap failed for ramblock '%s': %d"
4295 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4296 __func__, block->idstr, ret, local_size, size);
4297 ret = -EIO;
4298 goto out;
4299 }
4300
4301 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4302 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4303 __func__, block->idstr, end_mark);
4304 ret = -EINVAL;
4305 goto out;
4306 }
4307
4308
4309
4310
4311
4312 bitmap_from_le(block->bmap, le_bitmap, nbits);
4313
4314
4315
4316
4317
4318 bitmap_complement(block->bmap, block->bmap, nbits);
4319
4320 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4321
4322
4323
4324
4325
4326 ram_dirty_bitmap_reload_notify(s);
4327
4328 ret = 0;
4329out:
4330 g_free(le_bitmap);
4331 return ret;
4332}
4333
4334static int ram_resume_prepare(MigrationState *s, void *opaque)
4335{
4336 RAMState *rs = *(RAMState **)opaque;
4337 int ret;
4338
4339 ret = ram_dirty_bitmap_sync_all(s, rs);
4340 if (ret) {
4341 return ret;
4342 }
4343
4344 ram_state_resume_prepare(rs, s->to_dst_file);
4345
4346 return 0;
4347}
4348
4349static SaveVMHandlers savevm_ram_handlers = {
4350 .save_setup = ram_save_setup,
4351 .save_live_iterate = ram_save_iterate,
4352 .save_live_complete_postcopy = ram_save_complete,
4353 .save_live_complete_precopy = ram_save_complete,
4354 .has_postcopy = ram_has_postcopy,
4355 .save_live_pending = ram_save_pending,
4356 .load_state = ram_load,
4357 .save_cleanup = ram_save_cleanup,
4358 .load_setup = ram_load_setup,
4359 .load_cleanup = ram_load_cleanup,
4360 .resume_prepare = ram_resume_prepare,
4361};
4362
4363void ram_mig_init(void)
4364{
4365 qemu_mutex_init(&XBZRLE.lock);
4366 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
4367}
4368