1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "qemu/cutils.h"
31#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
33#include "qemu/main-loop.h"
34#include "xbzrle.h"
35#include "ram.h"
36#include "migration.h"
37#include "migration/register.h"
38#include "migration/misc.h"
39#include "qemu-file.h"
40#include "postcopy-ram.h"
41#include "page_cache.h"
42#include "qemu/error-report.h"
43#include "qapi/error.h"
44#include "qapi/qapi-types-migration.h"
45#include "qapi/qapi-events-migration.h"
46#include "qapi/qmp/qerror.h"
47#include "trace.h"
48#include "exec/ram_addr.h"
49#include "exec/target_page.h"
50#include "qemu/rcu_queue.h"
51#include "migration/colo.h"
52#include "block.h"
53#include "sysemu/cpu-throttle.h"
54#include "savevm.h"
55#include "qemu/iov.h"
56#include "multifd.h"
57#include "sysemu/runstate.h"
58
59#if defined(__linux__)
60#include "qemu/userfaultfd.h"
61#endif
62
63
64
65
66
67
68
69
70
71
72#define RAM_SAVE_FLAG_FULL 0x01
73#define RAM_SAVE_FLAG_ZERO 0x02
74#define RAM_SAVE_FLAG_MEM_SIZE 0x04
75#define RAM_SAVE_FLAG_PAGE 0x08
76#define RAM_SAVE_FLAG_EOS 0x10
77#define RAM_SAVE_FLAG_CONTINUE 0x20
78#define RAM_SAVE_FLAG_XBZRLE 0x40
79
80#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
81
82static inline bool is_zero_range(uint8_t *p, uint64_t size)
83{
84 return buffer_is_zero(p, size);
85}
86
87XBZRLECacheStats xbzrle_counters;
88
89
90
91static struct {
92
93 uint8_t *encoded_buf;
94
95 uint8_t *current_buf;
96
97 PageCache *cache;
98 QemuMutex lock;
99
100 uint8_t *zero_target_page;
101
102 uint8_t *decoded_buf;
103} XBZRLE;
104
105static void XBZRLE_cache_lock(void)
106{
107 if (migrate_use_xbzrle()) {
108 qemu_mutex_lock(&XBZRLE.lock);
109 }
110}
111
112static void XBZRLE_cache_unlock(void)
113{
114 if (migrate_use_xbzrle()) {
115 qemu_mutex_unlock(&XBZRLE.lock);
116 }
117}
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132int xbzrle_cache_resize(uint64_t new_size, Error **errp)
133{
134 PageCache *new_cache;
135 int64_t ret = 0;
136
137
138 if (new_size != (size_t)new_size) {
139 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
140 "exceeding address space");
141 return -1;
142 }
143
144 if (new_size == migrate_xbzrle_cache_size()) {
145
146 return 0;
147 }
148
149 XBZRLE_cache_lock();
150
151 if (XBZRLE.cache != NULL) {
152 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
153 if (!new_cache) {
154 ret = -1;
155 goto out;
156 }
157
158 cache_fini(XBZRLE.cache);
159 XBZRLE.cache = new_cache;
160 }
161out:
162 XBZRLE_cache_unlock();
163 return ret;
164}
165
166bool ramblock_is_ignored(RAMBlock *block)
167{
168 return !qemu_ram_is_migratable(block) ||
169 (migrate_ignore_shared() && qemu_ram_is_shared(block));
170}
171
172#undef RAMBLOCK_FOREACH
173
174int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
175{
176 RAMBlock *block;
177 int ret = 0;
178
179 RCU_READ_LOCK_GUARD();
180
181 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
182 ret = func(block, opaque);
183 if (ret) {
184 break;
185 }
186 }
187 return ret;
188}
189
190static void ramblock_recv_map_init(void)
191{
192 RAMBlock *rb;
193
194 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
195 assert(!rb->receivedmap);
196 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
197 }
198}
199
200int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
201{
202 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
203 rb->receivedmap);
204}
205
206bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
207{
208 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
209}
210
211void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
212{
213 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
214}
215
216void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
217 size_t nr)
218{
219 bitmap_set_atomic(rb->receivedmap,
220 ramblock_recv_bitmap_offset(host_addr, rb),
221 nr);
222}
223
224#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
225
226
227
228
229
230
231int64_t ramblock_recv_bitmap_send(QEMUFile *file,
232 const char *block_name)
233{
234 RAMBlock *block = qemu_ram_block_by_name(block_name);
235 unsigned long *le_bitmap, nbits;
236 uint64_t size;
237
238 if (!block) {
239 error_report("%s: invalid block name: %s", __func__, block_name);
240 return -1;
241 }
242
243 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
244
245
246
247
248
249
250 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
251
252
253
254
255
256
257 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
258
259
260 size = DIV_ROUND_UP(nbits, 8);
261
262
263
264
265
266
267
268 size = ROUND_UP(size, 8);
269
270 qemu_put_be64(file, size);
271 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
272
273
274
275
276 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
277 qemu_fflush(file);
278
279 g_free(le_bitmap);
280
281 if (qemu_file_get_error(file)) {
282 return qemu_file_get_error(file);
283 }
284
285 return size + sizeof(size);
286}
287
288
289
290
291
292struct RAMSrcPageRequest {
293 RAMBlock *rb;
294 hwaddr offset;
295 hwaddr len;
296
297 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
298};
299
300
301struct RAMState {
302
303 QEMUFile *f;
304
305 int uffdio_fd;
306
307 RAMBlock *last_seen_block;
308
309 RAMBlock *last_sent_block;
310
311 ram_addr_t last_page;
312
313 uint32_t last_version;
314
315 int dirty_rate_high_cnt;
316
317
318 int64_t time_last_bitmap_sync;
319
320 uint64_t bytes_xfer_prev;
321
322 uint64_t num_dirty_pages_period;
323
324 uint64_t xbzrle_cache_miss_prev;
325
326 uint64_t xbzrle_pages_prev;
327
328 uint64_t xbzrle_bytes_prev;
329
330 bool xbzrle_enabled;
331
332
333
334 uint64_t compress_thread_busy_prev;
335
336 uint64_t compressed_size_prev;
337
338 uint64_t compress_pages_prev;
339
340
341 uint64_t target_page_count_prev;
342
343 uint64_t target_page_count;
344
345 uint64_t migration_dirty_pages;
346
347 QemuMutex bitmap_mutex;
348
349 RAMBlock *last_req_rb;
350
351 QemuMutex src_page_req_mutex;
352 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
353};
354typedef struct RAMState RAMState;
355
356static RAMState *ram_state;
357
358static NotifierWithReturnList precopy_notifier_list;
359
360void precopy_infrastructure_init(void)
361{
362 notifier_with_return_list_init(&precopy_notifier_list);
363}
364
365void precopy_add_notifier(NotifierWithReturn *n)
366{
367 notifier_with_return_list_add(&precopy_notifier_list, n);
368}
369
370void precopy_remove_notifier(NotifierWithReturn *n)
371{
372 notifier_with_return_remove(n);
373}
374
375int precopy_notify(PrecopyNotifyReason reason, Error **errp)
376{
377 PrecopyNotifyData pnd;
378 pnd.reason = reason;
379 pnd.errp = errp;
380
381 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
382}
383
384uint64_t ram_bytes_remaining(void)
385{
386 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
387 0;
388}
389
390MigrationStats ram_counters;
391
392
393struct PageSearchStatus {
394
395 RAMBlock *block;
396
397 unsigned long page;
398
399 bool complete_round;
400};
401typedef struct PageSearchStatus PageSearchStatus;
402
403CompressionStats compression_counters;
404
405struct CompressParam {
406 bool done;
407 bool quit;
408 bool zero_page;
409 QEMUFile *file;
410 QemuMutex mutex;
411 QemuCond cond;
412 RAMBlock *block;
413 ram_addr_t offset;
414
415
416 z_stream stream;
417 uint8_t *originbuf;
418};
419typedef struct CompressParam CompressParam;
420
421struct DecompressParam {
422 bool done;
423 bool quit;
424 QemuMutex mutex;
425 QemuCond cond;
426 void *des;
427 uint8_t *compbuf;
428 int len;
429 z_stream stream;
430};
431typedef struct DecompressParam DecompressParam;
432
433static CompressParam *comp_param;
434static QemuThread *compress_threads;
435
436
437
438
439static QemuMutex comp_done_lock;
440static QemuCond comp_done_cond;
441
442static const QEMUFileOps empty_ops = { };
443
444static QEMUFile *decomp_file;
445static DecompressParam *decomp_param;
446static QemuThread *decompress_threads;
447static QemuMutex decomp_done_lock;
448static QemuCond decomp_done_cond;
449
450static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
451 ram_addr_t offset, uint8_t *source_buf);
452
453static void *do_data_compress(void *opaque)
454{
455 CompressParam *param = opaque;
456 RAMBlock *block;
457 ram_addr_t offset;
458 bool zero_page;
459
460 qemu_mutex_lock(¶m->mutex);
461 while (!param->quit) {
462 if (param->block) {
463 block = param->block;
464 offset = param->offset;
465 param->block = NULL;
466 qemu_mutex_unlock(¶m->mutex);
467
468 zero_page = do_compress_ram_page(param->file, ¶m->stream,
469 block, offset, param->originbuf);
470
471 qemu_mutex_lock(&comp_done_lock);
472 param->done = true;
473 param->zero_page = zero_page;
474 qemu_cond_signal(&comp_done_cond);
475 qemu_mutex_unlock(&comp_done_lock);
476
477 qemu_mutex_lock(¶m->mutex);
478 } else {
479 qemu_cond_wait(¶m->cond, ¶m->mutex);
480 }
481 }
482 qemu_mutex_unlock(¶m->mutex);
483
484 return NULL;
485}
486
487static void compress_threads_save_cleanup(void)
488{
489 int i, thread_count;
490
491 if (!migrate_use_compression() || !comp_param) {
492 return;
493 }
494
495 thread_count = migrate_compress_threads();
496 for (i = 0; i < thread_count; i++) {
497
498
499
500
501 if (!comp_param[i].file) {
502 break;
503 }
504
505 qemu_mutex_lock(&comp_param[i].mutex);
506 comp_param[i].quit = true;
507 qemu_cond_signal(&comp_param[i].cond);
508 qemu_mutex_unlock(&comp_param[i].mutex);
509
510 qemu_thread_join(compress_threads + i);
511 qemu_mutex_destroy(&comp_param[i].mutex);
512 qemu_cond_destroy(&comp_param[i].cond);
513 deflateEnd(&comp_param[i].stream);
514 g_free(comp_param[i].originbuf);
515 qemu_fclose(comp_param[i].file);
516 comp_param[i].file = NULL;
517 }
518 qemu_mutex_destroy(&comp_done_lock);
519 qemu_cond_destroy(&comp_done_cond);
520 g_free(compress_threads);
521 g_free(comp_param);
522 compress_threads = NULL;
523 comp_param = NULL;
524}
525
526static int compress_threads_save_setup(void)
527{
528 int i, thread_count;
529
530 if (!migrate_use_compression()) {
531 return 0;
532 }
533 thread_count = migrate_compress_threads();
534 compress_threads = g_new0(QemuThread, thread_count);
535 comp_param = g_new0(CompressParam, thread_count);
536 qemu_cond_init(&comp_done_cond);
537 qemu_mutex_init(&comp_done_lock);
538 for (i = 0; i < thread_count; i++) {
539 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
540 if (!comp_param[i].originbuf) {
541 goto exit;
542 }
543
544 if (deflateInit(&comp_param[i].stream,
545 migrate_compress_level()) != Z_OK) {
546 g_free(comp_param[i].originbuf);
547 goto exit;
548 }
549
550
551
552
553 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
554 comp_param[i].done = true;
555 comp_param[i].quit = false;
556 qemu_mutex_init(&comp_param[i].mutex);
557 qemu_cond_init(&comp_param[i].cond);
558 qemu_thread_create(compress_threads + i, "compress",
559 do_data_compress, comp_param + i,
560 QEMU_THREAD_JOINABLE);
561 }
562 return 0;
563
564exit:
565 compress_threads_save_cleanup();
566 return -1;
567}
568
569
570
571
572
573
574
575
576
577
578
579
580
581static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
582 ram_addr_t offset)
583{
584 size_t size, len;
585
586 if (block == rs->last_sent_block) {
587 offset |= RAM_SAVE_FLAG_CONTINUE;
588 }
589 qemu_put_be64(f, offset);
590 size = 8;
591
592 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
593 len = strlen(block->idstr);
594 qemu_put_byte(f, len);
595 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
596 size += 1 + len;
597 rs->last_sent_block = block;
598 }
599 return size;
600}
601
602
603
604
605
606
607
608
609
610
611static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
612 uint64_t bytes_dirty_threshold)
613{
614 MigrationState *s = migrate_get_current();
615 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
616 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
617 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
618 int pct_max = s->parameters.max_cpu_throttle;
619
620 uint64_t throttle_now = cpu_throttle_get_percentage();
621 uint64_t cpu_now, cpu_ideal, throttle_inc;
622
623
624 if (!cpu_throttle_active()) {
625 cpu_throttle_set(pct_initial);
626 } else {
627
628 if (!pct_tailslow) {
629 throttle_inc = pct_increment;
630 } else {
631
632
633 cpu_now = 100 - throttle_now;
634 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
635 bytes_dirty_period);
636 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
637 }
638 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
639 }
640}
641
642
643
644
645
646
647
648
649
650
651
652
653
654static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
655{
656 if (!rs->xbzrle_enabled) {
657 return;
658 }
659
660
661
662 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
663 ram_counters.dirty_sync_count);
664}
665
666#define ENCODING_FLAG_XBZRLE 0x1
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
683 ram_addr_t current_addr, RAMBlock *block,
684 ram_addr_t offset, bool last_stage)
685{
686 int encoded_len = 0, bytes_xbzrle;
687 uint8_t *prev_cached_page;
688
689 if (!cache_is_cached(XBZRLE.cache, current_addr,
690 ram_counters.dirty_sync_count)) {
691 xbzrle_counters.cache_miss++;
692 if (!last_stage) {
693 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
694 ram_counters.dirty_sync_count) == -1) {
695 return -1;
696 } else {
697
698
699 *current_data = get_cached_data(XBZRLE.cache, current_addr);
700 }
701 }
702 return -1;
703 }
704
705
706
707
708
709
710
711
712
713
714
715
716 xbzrle_counters.pages++;
717 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
718
719
720 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
721
722
723 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
724 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
725 TARGET_PAGE_SIZE);
726
727
728
729
730
731 if (!last_stage && encoded_len != 0) {
732 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
733
734
735
736
737
738 *current_data = prev_cached_page;
739 }
740
741 if (encoded_len == 0) {
742 trace_save_xbzrle_page_skipping();
743 return 0;
744 } else if (encoded_len == -1) {
745 trace_save_xbzrle_page_overflow();
746 xbzrle_counters.overflow++;
747 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
748 return -1;
749 }
750
751
752 bytes_xbzrle = save_page_header(rs, rs->f, block,
753 offset | RAM_SAVE_FLAG_XBZRLE);
754 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
755 qemu_put_be16(rs->f, encoded_len);
756 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
757 bytes_xbzrle += encoded_len + 1 + 2;
758
759
760
761
762
763 xbzrle_counters.bytes += bytes_xbzrle - 8;
764 ram_counters.transferred += bytes_xbzrle;
765
766 return 1;
767}
768
769
770
771
772
773
774
775
776
777
778static inline
779unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
780 unsigned long start)
781{
782 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
783 unsigned long *bitmap = rb->bmap;
784
785 if (ramblock_is_ignored(rb)) {
786 return size;
787 }
788
789 return find_next_bit(bitmap, size, start);
790}
791
792static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
793 RAMBlock *rb,
794 unsigned long page)
795{
796 uint8_t shift;
797 hwaddr size, start;
798
799 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
800 return;
801 }
802
803 shift = rb->clear_bmap_shift;
804
805
806
807
808
809
810
811
812 assert(shift >= 6);
813
814 size = 1ULL << (TARGET_PAGE_BITS + shift);
815 start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
816 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
817 memory_region_clear_dirty_bitmap(rb->mr, start, size);
818}
819
820static void
821migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
822 RAMBlock *rb,
823 unsigned long start,
824 unsigned long npages)
825{
826 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
827 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
828 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
829
830
831
832
833
834 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
835 migration_clear_memory_region_dirty_bitmap(rs, rb, i);
836 }
837}
838
839static inline bool migration_bitmap_clear_dirty(RAMState *rs,
840 RAMBlock *rb,
841 unsigned long page)
842{
843 bool ret;
844
845
846
847
848
849
850
851
852
853 migration_clear_memory_region_dirty_bitmap(rs, rb, page);
854
855 ret = test_and_clear_bit(page, rb->bmap);
856 if (ret) {
857 rs->migration_dirty_pages--;
858 }
859
860 return ret;
861}
862
863
864static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
865{
866 uint64_t new_dirty_pages =
867 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
868
869 rs->migration_dirty_pages += new_dirty_pages;
870 rs->num_dirty_pages_period += new_dirty_pages;
871}
872
873
874
875
876
877
878
879
880
881
882uint64_t ram_pagesize_summary(void)
883{
884 RAMBlock *block;
885 uint64_t summary = 0;
886
887 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
888 summary |= block->page_size;
889 }
890
891 return summary;
892}
893
894uint64_t ram_get_total_transferred_pages(void)
895{
896 return ram_counters.normal + ram_counters.duplicate +
897 compression_counters.pages + xbzrle_counters.pages;
898}
899
900static void migration_update_rates(RAMState *rs, int64_t end_time)
901{
902 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
903 double compressed_size;
904
905
906 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
907 / (end_time - rs->time_last_bitmap_sync);
908
909 if (!page_count) {
910 return;
911 }
912
913 if (migrate_use_xbzrle()) {
914 double encoded_size, unencoded_size;
915
916 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
917 rs->xbzrle_cache_miss_prev) / page_count;
918 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
919 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
920 TARGET_PAGE_SIZE;
921 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
922 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
923 xbzrle_counters.encoding_rate = 0;
924 } else {
925 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
926 }
927 rs->xbzrle_pages_prev = xbzrle_counters.pages;
928 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
929 }
930
931 if (migrate_use_compression()) {
932 compression_counters.busy_rate = (double)(compression_counters.busy -
933 rs->compress_thread_busy_prev) / page_count;
934 rs->compress_thread_busy_prev = compression_counters.busy;
935
936 compressed_size = compression_counters.compressed_size -
937 rs->compressed_size_prev;
938 if (compressed_size) {
939 double uncompressed_size = (compression_counters.pages -
940 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
941
942
943 compression_counters.compression_rate =
944 uncompressed_size / compressed_size;
945
946 rs->compress_pages_prev = compression_counters.pages;
947 rs->compressed_size_prev = compression_counters.compressed_size;
948 }
949 }
950}
951
952static void migration_trigger_throttle(RAMState *rs)
953{
954 MigrationState *s = migrate_get_current();
955 uint64_t threshold = s->parameters.throttle_trigger_threshold;
956
957 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
958 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
959 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
960
961
962
963
964 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
965
966
967
968
969
970
971 if ((bytes_dirty_period > bytes_dirty_threshold) &&
972 (++rs->dirty_rate_high_cnt >= 2)) {
973 trace_migration_throttle();
974 rs->dirty_rate_high_cnt = 0;
975 mig_throttle_guest_down(bytes_dirty_period,
976 bytes_dirty_threshold);
977 }
978 }
979}
980
981static void migration_bitmap_sync(RAMState *rs)
982{
983 RAMBlock *block;
984 int64_t end_time;
985
986 ram_counters.dirty_sync_count++;
987
988 if (!rs->time_last_bitmap_sync) {
989 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
990 }
991
992 trace_migration_bitmap_sync_start();
993 memory_global_dirty_log_sync();
994
995 qemu_mutex_lock(&rs->bitmap_mutex);
996 WITH_RCU_READ_LOCK_GUARD() {
997 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
998 ramblock_sync_dirty_bitmap(rs, block);
999 }
1000 ram_counters.remaining = ram_bytes_remaining();
1001 }
1002 qemu_mutex_unlock(&rs->bitmap_mutex);
1003
1004 memory_global_after_dirty_log_sync();
1005 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1006
1007 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1008
1009
1010 if (end_time > rs->time_last_bitmap_sync + 1000) {
1011 migration_trigger_throttle(rs);
1012
1013 migration_update_rates(rs, end_time);
1014
1015 rs->target_page_count_prev = rs->target_page_count;
1016
1017
1018 rs->time_last_bitmap_sync = end_time;
1019 rs->num_dirty_pages_period = 0;
1020 rs->bytes_xfer_prev = ram_counters.transferred;
1021 }
1022 if (migrate_use_events()) {
1023 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1024 }
1025}
1026
1027static void migration_bitmap_sync_precopy(RAMState *rs)
1028{
1029 Error *local_err = NULL;
1030
1031
1032
1033
1034
1035 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1036 error_report_err(local_err);
1037 local_err = NULL;
1038 }
1039
1040 migration_bitmap_sync(rs);
1041
1042 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1043 error_report_err(local_err);
1044 }
1045}
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1059 RAMBlock *block, ram_addr_t offset)
1060{
1061 uint8_t *p = block->host + offset;
1062 int len = 0;
1063
1064 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1065 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1066 qemu_put_byte(file, 0);
1067 len += 1;
1068 }
1069 return len;
1070}
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1082{
1083 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1084
1085 if (len) {
1086 ram_counters.duplicate++;
1087 ram_counters.transferred += len;
1088 return 1;
1089 }
1090 return -1;
1091}
1092
1093static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1094{
1095 if (!migrate_release_ram() || !migration_in_postcopy()) {
1096 return;
1097 }
1098
1099 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
1100}
1101
1102
1103
1104
1105
1106
1107
1108
1109static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1110 int *pages)
1111{
1112 uint64_t bytes_xmit = 0;
1113 int ret;
1114
1115 *pages = -1;
1116 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1117 &bytes_xmit);
1118 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1119 return false;
1120 }
1121
1122 if (bytes_xmit) {
1123 ram_counters.transferred += bytes_xmit;
1124 *pages = 1;
1125 }
1126
1127 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1128 return true;
1129 }
1130
1131 if (bytes_xmit > 0) {
1132 ram_counters.normal++;
1133 } else if (bytes_xmit == 0) {
1134 ram_counters.duplicate++;
1135 }
1136
1137 return true;
1138}
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1152 uint8_t *buf, bool async)
1153{
1154 ram_counters.transferred += save_page_header(rs, rs->f, block,
1155 offset | RAM_SAVE_FLAG_PAGE);
1156 if (async) {
1157 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1158 migrate_release_ram() &
1159 migration_in_postcopy());
1160 } else {
1161 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1162 }
1163 ram_counters.transferred += TARGET_PAGE_SIZE;
1164 ram_counters.normal++;
1165 return 1;
1166}
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1182{
1183 int pages = -1;
1184 uint8_t *p;
1185 bool send_async = true;
1186 RAMBlock *block = pss->block;
1187 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1188 ram_addr_t current_addr = block->offset + offset;
1189
1190 p = block->host + offset;
1191 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1192
1193 XBZRLE_cache_lock();
1194 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
1195 pages = save_xbzrle_page(rs, &p, current_addr, block,
1196 offset, last_stage);
1197 if (!last_stage) {
1198
1199
1200
1201 send_async = false;
1202 }
1203 }
1204
1205
1206 if (pages == -1) {
1207 pages = save_normal_page(rs, block, offset, p, send_async);
1208 }
1209
1210 XBZRLE_cache_unlock();
1211
1212 return pages;
1213}
1214
1215static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1216 ram_addr_t offset)
1217{
1218 if (multifd_queue_page(rs->f, block, offset) < 0) {
1219 return -1;
1220 }
1221 ram_counters.normal++;
1222
1223 return 1;
1224}
1225
1226static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1227 ram_addr_t offset, uint8_t *source_buf)
1228{
1229 RAMState *rs = ram_state;
1230 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1231 bool zero_page = false;
1232 int ret;
1233
1234 if (save_zero_page_to_file(rs, f, block, offset)) {
1235 zero_page = true;
1236 goto exit;
1237 }
1238
1239 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1240
1241
1242
1243
1244
1245
1246 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1247 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1248 if (ret < 0) {
1249 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1250 error_report("compressed data failed!");
1251 return false;
1252 }
1253
1254exit:
1255 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1256 return zero_page;
1257}
1258
1259static void
1260update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1261{
1262 ram_counters.transferred += bytes_xmit;
1263
1264 if (param->zero_page) {
1265 ram_counters.duplicate++;
1266 return;
1267 }
1268
1269
1270 compression_counters.compressed_size += bytes_xmit - 8;
1271 compression_counters.pages++;
1272}
1273
1274static bool save_page_use_compression(RAMState *rs);
1275
1276static void flush_compressed_data(RAMState *rs)
1277{
1278 int idx, len, thread_count;
1279
1280 if (!save_page_use_compression(rs)) {
1281 return;
1282 }
1283 thread_count = migrate_compress_threads();
1284
1285 qemu_mutex_lock(&comp_done_lock);
1286 for (idx = 0; idx < thread_count; idx++) {
1287 while (!comp_param[idx].done) {
1288 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1289 }
1290 }
1291 qemu_mutex_unlock(&comp_done_lock);
1292
1293 for (idx = 0; idx < thread_count; idx++) {
1294 qemu_mutex_lock(&comp_param[idx].mutex);
1295 if (!comp_param[idx].quit) {
1296 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1297
1298
1299
1300
1301
1302 update_compress_thread_counts(&comp_param[idx], len);
1303 }
1304 qemu_mutex_unlock(&comp_param[idx].mutex);
1305 }
1306}
1307
1308static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1309 ram_addr_t offset)
1310{
1311 param->block = block;
1312 param->offset = offset;
1313}
1314
1315static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1316 ram_addr_t offset)
1317{
1318 int idx, thread_count, bytes_xmit = -1, pages = -1;
1319 bool wait = migrate_compress_wait_thread();
1320
1321 thread_count = migrate_compress_threads();
1322 qemu_mutex_lock(&comp_done_lock);
1323retry:
1324 for (idx = 0; idx < thread_count; idx++) {
1325 if (comp_param[idx].done) {
1326 comp_param[idx].done = false;
1327 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1328 qemu_mutex_lock(&comp_param[idx].mutex);
1329 set_compress_params(&comp_param[idx], block, offset);
1330 qemu_cond_signal(&comp_param[idx].cond);
1331 qemu_mutex_unlock(&comp_param[idx].mutex);
1332 pages = 1;
1333 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1334 break;
1335 }
1336 }
1337
1338
1339
1340
1341
1342 if (pages < 0 && wait) {
1343 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1344 goto retry;
1345 }
1346 qemu_mutex_unlock(&comp_done_lock);
1347
1348 return pages;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1362{
1363 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1364 if (pss->complete_round && pss->block == rs->last_seen_block &&
1365 pss->page >= rs->last_page) {
1366
1367
1368
1369
1370 *again = false;
1371 return false;
1372 }
1373 if (!offset_in_ramblock(pss->block,
1374 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
1375
1376 pss->page = 0;
1377 pss->block = QLIST_NEXT_RCU(pss->block, next);
1378 if (!pss->block) {
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 flush_compressed_data(rs);
1389
1390
1391 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1392
1393 pss->complete_round = true;
1394
1395 if (migrate_use_xbzrle()) {
1396 rs->xbzrle_enabled = true;
1397 }
1398 }
1399
1400 *again = true;
1401 return false;
1402 } else {
1403
1404 *again = true;
1405
1406 return true;
1407 }
1408}
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1421{
1422 RAMBlock *block = NULL;
1423
1424 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1425 return NULL;
1426 }
1427
1428 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1429 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1430 struct RAMSrcPageRequest *entry =
1431 QSIMPLEQ_FIRST(&rs->src_page_requests);
1432 block = entry->rb;
1433 *offset = entry->offset;
1434
1435 if (entry->len > TARGET_PAGE_SIZE) {
1436 entry->len -= TARGET_PAGE_SIZE;
1437 entry->offset += TARGET_PAGE_SIZE;
1438 } else {
1439 memory_region_unref(block->mr);
1440 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1441 g_free(entry);
1442 migration_consume_urgent_request();
1443 }
1444 }
1445
1446 return block;
1447}
1448
1449#if defined(__linux__)
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1461{
1462 struct uffd_msg uffd_msg;
1463 void *page_address;
1464 RAMBlock *block;
1465 int res;
1466
1467 if (!migrate_background_snapshot()) {
1468 return NULL;
1469 }
1470
1471 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1472 if (res <= 0) {
1473 return NULL;
1474 }
1475
1476 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
1477 block = qemu_ram_block_from_host(page_address, false, offset);
1478 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1479 return block;
1480}
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1493 unsigned long start_page)
1494{
1495 int res = 0;
1496
1497
1498 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1499 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1500 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
1501
1502
1503 qemu_fflush(rs->f);
1504
1505 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1506 false, false);
1507 }
1508
1509 return res;
1510}
1511
1512
1513
1514
1515
1516bool ram_write_tracking_available(void)
1517{
1518 uint64_t uffd_features;
1519 int res;
1520
1521 res = uffd_query_features(&uffd_features);
1522 return (res == 0 &&
1523 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1524}
1525
1526
1527
1528
1529
1530
1531bool ram_write_tracking_compatible(void)
1532{
1533 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1534 int uffd_fd;
1535 RAMBlock *block;
1536 bool ret = false;
1537
1538
1539 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1540 if (uffd_fd < 0) {
1541 return false;
1542 }
1543
1544 RCU_READ_LOCK_GUARD();
1545
1546 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1547 uint64_t uffd_ioctls;
1548
1549
1550 if (block->mr->readonly || block->mr->rom_device) {
1551 continue;
1552 }
1553
1554 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1555 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1556 goto out;
1557 }
1558 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1559 goto out;
1560 }
1561 }
1562 ret = true;
1563
1564out:
1565 uffd_close_fd(uffd_fd);
1566 return ret;
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578static void ram_block_populate_pages(RAMBlock *block)
1579{
1580 char *ptr = (char *) block->host;
1581
1582 for (ram_addr_t offset = 0; offset < block->used_length;
1583 offset += qemu_real_host_page_size) {
1584 char tmp = *(ptr + offset);
1585
1586
1587 asm volatile("" : "+r" (tmp));
1588 }
1589}
1590
1591
1592
1593
1594void ram_write_tracking_prepare(void)
1595{
1596 RAMBlock *block;
1597
1598 RCU_READ_LOCK_GUARD();
1599
1600 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1601
1602 if (block->mr->readonly || block->mr->rom_device) {
1603 continue;
1604 }
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614 ram_block_populate_pages(block);
1615 }
1616}
1617
1618
1619
1620
1621
1622
1623int ram_write_tracking_start(void)
1624{
1625 int uffd_fd;
1626 RAMState *rs = ram_state;
1627 RAMBlock *block;
1628
1629
1630 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1631 if (uffd_fd < 0) {
1632 return uffd_fd;
1633 }
1634 rs->uffdio_fd = uffd_fd;
1635
1636 RCU_READ_LOCK_GUARD();
1637
1638 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1639
1640 if (block->mr->readonly || block->mr->rom_device) {
1641 continue;
1642 }
1643
1644
1645 if (uffd_register_memory(rs->uffdio_fd, block->host,
1646 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1647 goto fail;
1648 }
1649
1650 if (uffd_change_protection(rs->uffdio_fd, block->host,
1651 block->max_length, true, false)) {
1652 goto fail;
1653 }
1654 block->flags |= RAM_UF_WRITEPROTECT;
1655 memory_region_ref(block->mr);
1656
1657 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1658 block->host, block->max_length);
1659 }
1660
1661 return 0;
1662
1663fail:
1664 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1665
1666 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1667 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1668 continue;
1669 }
1670
1671
1672
1673
1674 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1675 false, false);
1676 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1677
1678 block->flags &= ~RAM_UF_WRITEPROTECT;
1679 memory_region_unref(block->mr);
1680 }
1681
1682 uffd_close_fd(uffd_fd);
1683 rs->uffdio_fd = -1;
1684 return -1;
1685}
1686
1687
1688
1689
1690void ram_write_tracking_stop(void)
1691{
1692 RAMState *rs = ram_state;
1693 RAMBlock *block;
1694
1695 RCU_READ_LOCK_GUARD();
1696
1697 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1698 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1699 continue;
1700 }
1701
1702 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1703 false, false);
1704 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1705
1706 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1707 block->host, block->max_length);
1708
1709
1710 block->flags &= ~RAM_UF_WRITEPROTECT;
1711 memory_region_unref(block->mr);
1712 }
1713
1714
1715 uffd_close_fd(rs->uffdio_fd);
1716 rs->uffdio_fd = -1;
1717}
1718
1719#else
1720
1721
1722static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1723{
1724 (void) rs;
1725 (void) offset;
1726
1727 return NULL;
1728}
1729
1730static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1731 unsigned long start_page)
1732{
1733 (void) rs;
1734 (void) pss;
1735 (void) start_page;
1736
1737 return 0;
1738}
1739
1740bool ram_write_tracking_available(void)
1741{
1742 return false;
1743}
1744
1745bool ram_write_tracking_compatible(void)
1746{
1747 assert(0);
1748 return false;
1749}
1750
1751int ram_write_tracking_start(void)
1752{
1753 assert(0);
1754 return -1;
1755}
1756
1757void ram_write_tracking_stop(void)
1758{
1759 assert(0);
1760}
1761#endif
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1774{
1775 RAMBlock *block;
1776 ram_addr_t offset;
1777 bool dirty;
1778
1779 do {
1780 block = unqueue_page(rs, &offset);
1781
1782
1783
1784
1785
1786
1787 if (block) {
1788 unsigned long page;
1789
1790 page = offset >> TARGET_PAGE_BITS;
1791 dirty = test_bit(page, block->bmap);
1792 if (!dirty) {
1793 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1794 page);
1795 } else {
1796 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1797 }
1798 }
1799
1800 } while (block && !dirty);
1801
1802 if (!block) {
1803
1804
1805
1806
1807 block = poll_fault_page(rs, &offset);
1808 }
1809
1810 if (block) {
1811
1812
1813
1814
1815
1816 pss->block = block;
1817 pss->page = offset >> TARGET_PAGE_BITS;
1818
1819
1820
1821
1822
1823 pss->complete_round = false;
1824 }
1825
1826 return !!block;
1827}
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837static void migration_page_queue_free(RAMState *rs)
1838{
1839 struct RAMSrcPageRequest *mspr, *next_mspr;
1840
1841
1842
1843 RCU_READ_LOCK_GUARD();
1844 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1845 memory_region_unref(mspr->rb->mr);
1846 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1847 g_free(mspr);
1848 }
1849}
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1864{
1865 RAMBlock *ramblock;
1866 RAMState *rs = ram_state;
1867
1868 ram_counters.postcopy_requests++;
1869 RCU_READ_LOCK_GUARD();
1870
1871 if (!rbname) {
1872
1873 ramblock = rs->last_req_rb;
1874
1875 if (!ramblock) {
1876
1877
1878
1879
1880 error_report("ram_save_queue_pages no previous block");
1881 return -1;
1882 }
1883 } else {
1884 ramblock = qemu_ram_block_by_name(rbname);
1885
1886 if (!ramblock) {
1887
1888 error_report("ram_save_queue_pages no block '%s'", rbname);
1889 return -1;
1890 }
1891 rs->last_req_rb = ramblock;
1892 }
1893 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1894 if (!offset_in_ramblock(ramblock, start + len - 1)) {
1895 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1896 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1897 __func__, start, len, ramblock->used_length);
1898 return -1;
1899 }
1900
1901 struct RAMSrcPageRequest *new_entry =
1902 g_malloc0(sizeof(struct RAMSrcPageRequest));
1903 new_entry->rb = ramblock;
1904 new_entry->offset = start;
1905 new_entry->len = len;
1906
1907 memory_region_ref(ramblock->mr);
1908 qemu_mutex_lock(&rs->src_page_req_mutex);
1909 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1910 migration_make_urgent_request();
1911 qemu_mutex_unlock(&rs->src_page_req_mutex);
1912
1913 return 0;
1914}
1915
1916static bool save_page_use_compression(RAMState *rs)
1917{
1918 if (!migrate_use_compression()) {
1919 return false;
1920 }
1921
1922
1923
1924
1925
1926
1927 if (rs->xbzrle_enabled) {
1928 return false;
1929 }
1930
1931 return true;
1932}
1933
1934
1935
1936
1937
1938
1939static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1940{
1941 if (!save_page_use_compression(rs)) {
1942 return false;
1943 }
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955 if (block != rs->last_sent_block) {
1956 flush_compressed_data(rs);
1957 return false;
1958 }
1959
1960 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
1961 return true;
1962 }
1963
1964 compression_counters.busy++;
1965 return false;
1966}
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1978 bool last_stage)
1979{
1980 RAMBlock *block = pss->block;
1981 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1982 int res;
1983
1984 if (control_save_page(rs, block, offset, &res)) {
1985 return res;
1986 }
1987
1988 if (save_compress_page(rs, block, offset)) {
1989 return 1;
1990 }
1991
1992 res = save_zero_page(rs, block, offset);
1993 if (res > 0) {
1994
1995
1996
1997 if (!save_page_use_compression(rs)) {
1998 XBZRLE_cache_lock();
1999 xbzrle_cache_zero_page(rs, block->offset + offset);
2000 XBZRLE_cache_unlock();
2001 }
2002 ram_release_pages(block->idstr, offset, res);
2003 return res;
2004 }
2005
2006
2007
2008
2009
2010
2011
2012 if (!save_page_use_compression(rs) && migrate_use_multifd()
2013 && !migration_in_postcopy()) {
2014 return ram_save_multifd_page(rs, block, offset);
2015 }
2016
2017 return ram_save_page(rs, pss, last_stage);
2018}
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2039 bool last_stage)
2040{
2041 int tmppages, pages = 0;
2042 size_t pagesize_bits =
2043 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2044 unsigned long hostpage_boundary =
2045 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
2046 unsigned long start_page = pss->page;
2047 int res;
2048
2049 if (ramblock_is_ignored(pss->block)) {
2050 error_report("block %s should not be migrated !", pss->block->idstr);
2051 return 0;
2052 }
2053
2054 do {
2055
2056 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2057 tmppages = ram_save_target_page(rs, pss, last_stage);
2058 if (tmppages < 0) {
2059 return tmppages;
2060 }
2061
2062 pages += tmppages;
2063
2064
2065
2066
2067 if (pagesize_bits > 1 && tmppages > 0) {
2068 migration_rate_limit();
2069 }
2070 }
2071 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2072 } while ((pss->page < hostpage_boundary) &&
2073 offset_in_ramblock(pss->block,
2074 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
2075
2076 pss->page = MIN(pss->page, hostpage_boundary) - 1;
2077
2078 res = ram_save_release_protection(rs, pss, start_page);
2079 return (res < 0 ? res : pages);
2080}
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2098{
2099 PageSearchStatus pss;
2100 int pages = 0;
2101 bool again, found;
2102
2103
2104 if (!ram_bytes_total()) {
2105 return pages;
2106 }
2107
2108 pss.block = rs->last_seen_block;
2109 pss.page = rs->last_page;
2110 pss.complete_round = false;
2111
2112 if (!pss.block) {
2113 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2114 }
2115
2116 do {
2117 again = true;
2118 found = get_queued_page(rs, &pss);
2119
2120 if (!found) {
2121
2122 found = find_dirty_block(rs, &pss, &again);
2123 }
2124
2125 if (found) {
2126 pages = ram_save_host_page(rs, &pss, last_stage);
2127 }
2128 } while (!pages && again);
2129
2130 rs->last_seen_block = pss.block;
2131 rs->last_page = pss.page;
2132
2133 return pages;
2134}
2135
2136void acct_update_position(QEMUFile *f, size_t size, bool zero)
2137{
2138 uint64_t pages = size / TARGET_PAGE_SIZE;
2139
2140 if (zero) {
2141 ram_counters.duplicate += pages;
2142 } else {
2143 ram_counters.normal += pages;
2144 ram_counters.transferred += size;
2145 qemu_update_position(f, size);
2146 }
2147}
2148
2149static uint64_t ram_bytes_total_common(bool count_ignored)
2150{
2151 RAMBlock *block;
2152 uint64_t total = 0;
2153
2154 RCU_READ_LOCK_GUARD();
2155
2156 if (count_ignored) {
2157 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2158 total += block->used_length;
2159 }
2160 } else {
2161 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2162 total += block->used_length;
2163 }
2164 }
2165 return total;
2166}
2167
2168uint64_t ram_bytes_total(void)
2169{
2170 return ram_bytes_total_common(false);
2171}
2172
2173static void xbzrle_load_setup(void)
2174{
2175 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2176}
2177
2178static void xbzrle_load_cleanup(void)
2179{
2180 g_free(XBZRLE.decoded_buf);
2181 XBZRLE.decoded_buf = NULL;
2182}
2183
2184static void ram_state_cleanup(RAMState **rsp)
2185{
2186 if (*rsp) {
2187 migration_page_queue_free(*rsp);
2188 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2189 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2190 g_free(*rsp);
2191 *rsp = NULL;
2192 }
2193}
2194
2195static void xbzrle_cleanup(void)
2196{
2197 XBZRLE_cache_lock();
2198 if (XBZRLE.cache) {
2199 cache_fini(XBZRLE.cache);
2200 g_free(XBZRLE.encoded_buf);
2201 g_free(XBZRLE.current_buf);
2202 g_free(XBZRLE.zero_target_page);
2203 XBZRLE.cache = NULL;
2204 XBZRLE.encoded_buf = NULL;
2205 XBZRLE.current_buf = NULL;
2206 XBZRLE.zero_target_page = NULL;
2207 }
2208 XBZRLE_cache_unlock();
2209}
2210
2211static void ram_save_cleanup(void *opaque)
2212{
2213 RAMState **rsp = opaque;
2214 RAMBlock *block;
2215
2216
2217 if (!migrate_background_snapshot()) {
2218
2219
2220
2221 memory_global_dirty_log_stop();
2222 }
2223
2224 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2225 g_free(block->clear_bmap);
2226 block->clear_bmap = NULL;
2227 g_free(block->bmap);
2228 block->bmap = NULL;
2229 }
2230
2231 xbzrle_cleanup();
2232 compress_threads_save_cleanup();
2233 ram_state_cleanup(rsp);
2234}
2235
2236static void ram_state_reset(RAMState *rs)
2237{
2238 rs->last_seen_block = NULL;
2239 rs->last_sent_block = NULL;
2240 rs->last_page = 0;
2241 rs->last_version = ram_list.version;
2242 rs->xbzrle_enabled = false;
2243}
2244
2245#define MAX_WAIT 50
2246
2247
2248
2249
2250
2251
2252void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2253 unsigned long pages)
2254{
2255 int64_t cur;
2256 int64_t linelen = 128;
2257 char linebuf[129];
2258
2259 for (cur = 0; cur < pages; cur += linelen) {
2260 int64_t curb;
2261 bool found = false;
2262
2263
2264
2265
2266 if (cur + linelen > pages) {
2267 linelen = pages - cur;
2268 }
2269 for (curb = 0; curb < linelen; curb++) {
2270 bool thisbit = test_bit(cur + curb, todump);
2271 linebuf[curb] = thisbit ? '1' : '.';
2272 found = found || (thisbit != expected);
2273 }
2274 if (found) {
2275 linebuf[curb] = '\0';
2276 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2277 }
2278 }
2279}
2280
2281
2282
2283void ram_postcopy_migrated_memory_release(MigrationState *ms)
2284{
2285 struct RAMBlock *block;
2286
2287 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2288 unsigned long *bitmap = block->bmap;
2289 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2290 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2291
2292 while (run_start < range) {
2293 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2294 ram_discard_range(block->idstr,
2295 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2296 ((ram_addr_t)(run_end - run_start))
2297 << TARGET_PAGE_BITS);
2298 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2299 }
2300 }
2301}
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2314{
2315 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2316 unsigned long current;
2317 unsigned long *bitmap = block->bmap;
2318
2319 for (current = 0; current < end; ) {
2320 unsigned long one = find_next_bit(bitmap, end, current);
2321 unsigned long zero, discard_length;
2322
2323 if (one >= end) {
2324 break;
2325 }
2326
2327 zero = find_next_zero_bit(bitmap, end, one + 1);
2328
2329 if (zero >= end) {
2330 discard_length = end - one;
2331 } else {
2332 discard_length = zero - one;
2333 }
2334 postcopy_discard_send_range(ms, one, discard_length);
2335 current = one + discard_length;
2336 }
2337
2338 return 0;
2339}
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354static int postcopy_each_ram_send_discard(MigrationState *ms)
2355{
2356 struct RAMBlock *block;
2357 int ret;
2358
2359 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2360 postcopy_discard_send_init(ms, block->idstr);
2361
2362
2363
2364
2365
2366
2367 ret = postcopy_send_discard_bm_ram(ms, block);
2368 postcopy_discard_send_finish(ms);
2369 if (ret) {
2370 return ret;
2371 }
2372 }
2373
2374 return 0;
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2391{
2392 RAMState *rs = ram_state;
2393 unsigned long *bitmap = block->bmap;
2394 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2395 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2396 unsigned long run_start;
2397
2398 if (block->page_size == TARGET_PAGE_SIZE) {
2399
2400 return;
2401 }
2402
2403
2404 run_start = find_next_bit(bitmap, pages, 0);
2405
2406 while (run_start < pages) {
2407
2408
2409
2410
2411
2412 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2413
2414 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2415
2416
2417
2418
2419
2420 }
2421
2422 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2423 unsigned long page;
2424 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2425 host_ratio);
2426 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2427
2428
2429 for (page = fixup_start_addr;
2430 page < fixup_start_addr + host_ratio; page++) {
2431
2432
2433
2434
2435 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2436 }
2437 }
2438
2439
2440 run_start = find_next_bit(bitmap, pages, run_start);
2441 }
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2459{
2460 postcopy_discard_send_init(ms, block->idstr);
2461
2462
2463
2464
2465 postcopy_chunk_hostpages_pass(ms, block);
2466
2467 postcopy_discard_send_finish(ms);
2468 return 0;
2469}
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2487{
2488 RAMState *rs = ram_state;
2489 RAMBlock *block;
2490 int ret;
2491
2492 RCU_READ_LOCK_GUARD();
2493
2494
2495 migration_bitmap_sync(rs);
2496
2497
2498 rs->last_seen_block = NULL;
2499 rs->last_sent_block = NULL;
2500 rs->last_page = 0;
2501
2502 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2503
2504 ret = postcopy_chunk_hostpages(ms, block);
2505 if (ret) {
2506 return ret;
2507 }
2508
2509#ifdef DEBUG_POSTCOPY
2510 ram_debug_dump_bitmap(block->bmap, true,
2511 block->used_length >> TARGET_PAGE_BITS);
2512#endif
2513 }
2514 trace_ram_postcopy_send_discard_bitmap();
2515
2516 return postcopy_each_ram_send_discard(ms);
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2530{
2531 trace_ram_discard_range(rbname, start, length);
2532
2533 RCU_READ_LOCK_GUARD();
2534 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2535
2536 if (!rb) {
2537 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2538 return -1;
2539 }
2540
2541
2542
2543
2544
2545 if (rb->receivedmap) {
2546 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2547 length >> qemu_target_page_bits());
2548 }
2549
2550 return ram_block_discard_range(rb, start, length);
2551}
2552
2553
2554
2555
2556
2557static int xbzrle_init(void)
2558{
2559 Error *local_err = NULL;
2560
2561 if (!migrate_use_xbzrle()) {
2562 return 0;
2563 }
2564
2565 XBZRLE_cache_lock();
2566
2567 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2568 if (!XBZRLE.zero_target_page) {
2569 error_report("%s: Error allocating zero page", __func__);
2570 goto err_out;
2571 }
2572
2573 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2574 TARGET_PAGE_SIZE, &local_err);
2575 if (!XBZRLE.cache) {
2576 error_report_err(local_err);
2577 goto free_zero_page;
2578 }
2579
2580 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2581 if (!XBZRLE.encoded_buf) {
2582 error_report("%s: Error allocating encoded_buf", __func__);
2583 goto free_cache;
2584 }
2585
2586 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2587 if (!XBZRLE.current_buf) {
2588 error_report("%s: Error allocating current_buf", __func__);
2589 goto free_encoded_buf;
2590 }
2591
2592
2593 XBZRLE_cache_unlock();
2594 return 0;
2595
2596free_encoded_buf:
2597 g_free(XBZRLE.encoded_buf);
2598 XBZRLE.encoded_buf = NULL;
2599free_cache:
2600 cache_fini(XBZRLE.cache);
2601 XBZRLE.cache = NULL;
2602free_zero_page:
2603 g_free(XBZRLE.zero_target_page);
2604 XBZRLE.zero_target_page = NULL;
2605err_out:
2606 XBZRLE_cache_unlock();
2607 return -ENOMEM;
2608}
2609
2610static int ram_state_init(RAMState **rsp)
2611{
2612 *rsp = g_try_new0(RAMState, 1);
2613
2614 if (!*rsp) {
2615 error_report("%s: Init ramstate fail", __func__);
2616 return -1;
2617 }
2618
2619 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2620 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2621 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2622
2623
2624
2625
2626
2627
2628 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2629 ram_state_reset(*rsp);
2630
2631 return 0;
2632}
2633
2634static void ram_list_init_bitmaps(void)
2635{
2636 MigrationState *ms = migrate_get_current();
2637 RAMBlock *block;
2638 unsigned long pages;
2639 uint8_t shift;
2640
2641
2642 if (ram_bytes_total()) {
2643 shift = ms->clear_bitmap_shift;
2644 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2645 error_report("clear_bitmap_shift (%u) too big, using "
2646 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2647 shift = CLEAR_BITMAP_SHIFT_MAX;
2648 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2649 error_report("clear_bitmap_shift (%u) too small, using "
2650 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2651 shift = CLEAR_BITMAP_SHIFT_MIN;
2652 }
2653
2654 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2655 pages = block->max_length >> TARGET_PAGE_BITS;
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 block->bmap = bitmap_new(pages);
2666 bitmap_set(block->bmap, 0, pages);
2667 block->clear_bmap_shift = shift;
2668 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2669 }
2670 }
2671}
2672
2673static void ram_init_bitmaps(RAMState *rs)
2674{
2675
2676 qemu_mutex_lock_iothread();
2677 qemu_mutex_lock_ramlist();
2678
2679 WITH_RCU_READ_LOCK_GUARD() {
2680 ram_list_init_bitmaps();
2681
2682 if (!migrate_background_snapshot()) {
2683 memory_global_dirty_log_start();
2684 migration_bitmap_sync_precopy(rs);
2685 }
2686 }
2687 qemu_mutex_unlock_ramlist();
2688 qemu_mutex_unlock_iothread();
2689}
2690
2691static int ram_init_all(RAMState **rsp)
2692{
2693 if (ram_state_init(rsp)) {
2694 return -1;
2695 }
2696
2697 if (xbzrle_init()) {
2698 ram_state_cleanup(rsp);
2699 return -1;
2700 }
2701
2702 ram_init_bitmaps(*rsp);
2703
2704 return 0;
2705}
2706
2707static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2708{
2709 RAMBlock *block;
2710 uint64_t pages = 0;
2711
2712
2713
2714
2715
2716
2717
2718 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2719 pages += bitmap_count_one(block->bmap,
2720 block->used_length >> TARGET_PAGE_BITS);
2721 }
2722
2723
2724 rs->migration_dirty_pages = pages;
2725
2726 ram_state_reset(rs);
2727
2728
2729 rs->f = out;
2730
2731 trace_ram_state_resume_prepare(pages);
2732}
2733
2734
2735
2736
2737
2738
2739
2740void qemu_guest_free_page_hint(void *addr, size_t len)
2741{
2742 RAMBlock *block;
2743 ram_addr_t offset;
2744 size_t used_len, start, npages;
2745 MigrationState *s = migrate_get_current();
2746
2747
2748 if (!migration_is_setup_or_active(s->state)) {
2749 return;
2750 }
2751
2752 for (; len > 0; len -= used_len, addr += used_len) {
2753 block = qemu_ram_block_from_host(addr, false, &offset);
2754 if (unlikely(!block || offset >= block->used_length)) {
2755
2756
2757
2758
2759
2760 error_report_once("%s unexpected error", __func__);
2761 return;
2762 }
2763
2764 if (len <= block->used_length - offset) {
2765 used_len = len;
2766 } else {
2767 used_len = block->used_length - offset;
2768 }
2769
2770 start = offset >> TARGET_PAGE_BITS;
2771 npages = used_len >> TARGET_PAGE_BITS;
2772
2773 qemu_mutex_lock(&ram_state->bitmap_mutex);
2774
2775
2776
2777
2778
2779
2780 migration_clear_memory_region_dirty_bitmap_range(ram_state, block,
2781 start, npages);
2782 ram_state->migration_dirty_pages -=
2783 bitmap_count_one_with_offset(block->bmap, start, npages);
2784 bitmap_clear(block->bmap, start, npages);
2785 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2786 }
2787}
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804static int ram_save_setup(QEMUFile *f, void *opaque)
2805{
2806 RAMState **rsp = opaque;
2807 RAMBlock *block;
2808
2809 if (compress_threads_save_setup()) {
2810 return -1;
2811 }
2812
2813
2814 if (!migration_in_colo_state()) {
2815 if (ram_init_all(rsp) != 0) {
2816 compress_threads_save_cleanup();
2817 return -1;
2818 }
2819 }
2820 (*rsp)->f = f;
2821
2822 WITH_RCU_READ_LOCK_GUARD() {
2823 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
2824
2825 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2826 qemu_put_byte(f, strlen(block->idstr));
2827 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2828 qemu_put_be64(f, block->used_length);
2829 if (migrate_postcopy_ram() && block->page_size !=
2830 qemu_host_page_size) {
2831 qemu_put_be64(f, block->page_size);
2832 }
2833 if (migrate_ignore_shared()) {
2834 qemu_put_be64(f, block->mr->addr);
2835 }
2836 }
2837 }
2838
2839 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2840 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2841
2842 multifd_send_sync_main(f);
2843 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2844 qemu_fflush(f);
2845
2846 return 0;
2847}
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857static int ram_save_iterate(QEMUFile *f, void *opaque)
2858{
2859 RAMState **temp = opaque;
2860 RAMState *rs = *temp;
2861 int ret = 0;
2862 int i;
2863 int64_t t0;
2864 int done = 0;
2865
2866 if (blk_mig_bulk_active()) {
2867
2868
2869
2870 goto out;
2871 }
2872
2873
2874
2875
2876
2877
2878
2879
2880 qemu_mutex_lock(&rs->bitmap_mutex);
2881 WITH_RCU_READ_LOCK_GUARD() {
2882 if (ram_list.version != rs->last_version) {
2883 ram_state_reset(rs);
2884 }
2885
2886
2887 smp_rmb();
2888
2889 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2890
2891 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2892 i = 0;
2893 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2894 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2895 int pages;
2896
2897 if (qemu_file_get_error(f)) {
2898 break;
2899 }
2900
2901 pages = ram_find_and_save_block(rs, false);
2902
2903 if (pages == 0) {
2904 done = 1;
2905 break;
2906 }
2907
2908 if (pages < 0) {
2909 qemu_file_set_error(f, pages);
2910 break;
2911 }
2912
2913 rs->target_page_count += pages;
2914
2915
2916
2917
2918
2919 if (migrate_postcopy_ram()) {
2920 flush_compressed_data(rs);
2921 }
2922
2923
2924
2925
2926
2927
2928
2929 if ((i & 63) == 0) {
2930 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
2931 1000000;
2932 if (t1 > MAX_WAIT) {
2933 trace_ram_save_iterate_big_wait(t1, i);
2934 break;
2935 }
2936 }
2937 i++;
2938 }
2939 }
2940 qemu_mutex_unlock(&rs->bitmap_mutex);
2941
2942
2943
2944
2945
2946 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2947
2948out:
2949 if (ret >= 0
2950 && migration_is_setup_or_active(migrate_get_current()->state)) {
2951 multifd_send_sync_main(rs->f);
2952 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2953 qemu_fflush(f);
2954 ram_counters.transferred += 8;
2955
2956 ret = qemu_file_get_error(f);
2957 }
2958 if (ret < 0) {
2959 return ret;
2960 }
2961
2962 return done;
2963}
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975static int ram_save_complete(QEMUFile *f, void *opaque)
2976{
2977 RAMState **temp = opaque;
2978 RAMState *rs = *temp;
2979 int ret = 0;
2980
2981 WITH_RCU_READ_LOCK_GUARD() {
2982 if (!migration_in_postcopy()) {
2983 migration_bitmap_sync_precopy(rs);
2984 }
2985
2986 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2987
2988
2989
2990
2991 while (true) {
2992 int pages;
2993
2994 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2995
2996 if (pages == 0) {
2997 break;
2998 }
2999 if (pages < 0) {
3000 ret = pages;
3001 break;
3002 }
3003 }
3004
3005 flush_compressed_data(rs);
3006 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3007 }
3008
3009 if (ret >= 0) {
3010 multifd_send_sync_main(rs->f);
3011 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3012 qemu_fflush(f);
3013 }
3014
3015 return ret;
3016}
3017
3018static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3019 uint64_t *res_precopy_only,
3020 uint64_t *res_compatible,
3021 uint64_t *res_postcopy_only)
3022{
3023 RAMState **temp = opaque;
3024 RAMState *rs = *temp;
3025 uint64_t remaining_size;
3026
3027 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3028
3029 if (!migration_in_postcopy() &&
3030 remaining_size < max_size) {
3031 qemu_mutex_lock_iothread();
3032 WITH_RCU_READ_LOCK_GUARD() {
3033 migration_bitmap_sync_precopy(rs);
3034 }
3035 qemu_mutex_unlock_iothread();
3036 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3037 }
3038
3039 if (migrate_postcopy_ram()) {
3040
3041 *res_compatible += remaining_size;
3042 } else {
3043 *res_precopy_only += remaining_size;
3044 }
3045}
3046
3047static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3048{
3049 unsigned int xh_len;
3050 int xh_flags;
3051 uint8_t *loaded_data;
3052
3053
3054 xh_flags = qemu_get_byte(f);
3055 xh_len = qemu_get_be16(f);
3056
3057 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3058 error_report("Failed to load XBZRLE page - wrong compression!");
3059 return -1;
3060 }
3061
3062 if (xh_len > TARGET_PAGE_SIZE) {
3063 error_report("Failed to load XBZRLE page - len overflow!");
3064 return -1;
3065 }
3066 loaded_data = XBZRLE.decoded_buf;
3067
3068
3069 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3070
3071
3072 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3073 TARGET_PAGE_SIZE) == -1) {
3074 error_report("Failed to load XBZRLE page - decode error!");
3075 return -1;
3076 }
3077
3078 return 0;
3079}
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3092{
3093 static RAMBlock *block;
3094 char id[256];
3095 uint8_t len;
3096
3097 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3098 if (!block) {
3099 error_report("Ack, bad migration stream!");
3100 return NULL;
3101 }
3102 return block;
3103 }
3104
3105 len = qemu_get_byte(f);
3106 qemu_get_buffer(f, (uint8_t *)id, len);
3107 id[len] = 0;
3108
3109 block = qemu_ram_block_by_name(id);
3110 if (!block) {
3111 error_report("Can't find block %s", id);
3112 return NULL;
3113 }
3114
3115 if (ramblock_is_ignored(block)) {
3116 error_report("block %s should not be migrated !", id);
3117 return NULL;
3118 }
3119
3120 return block;
3121}
3122
3123static inline void *host_from_ram_block_offset(RAMBlock *block,
3124 ram_addr_t offset)
3125{
3126 if (!offset_in_ramblock(block, offset)) {
3127 return NULL;
3128 }
3129
3130 return block->host + offset;
3131}
3132
3133static void *host_page_from_ram_block_offset(RAMBlock *block,
3134 ram_addr_t offset)
3135{
3136
3137 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3138 block->page_size);
3139}
3140
3141static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3142 ram_addr_t offset)
3143{
3144 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3145}
3146
3147static inline void *colo_cache_from_block_offset(RAMBlock *block,
3148 ram_addr_t offset, bool record_bitmap)
3149{
3150 if (!offset_in_ramblock(block, offset)) {
3151 return NULL;
3152 }
3153 if (!block->colo_cache) {
3154 error_report("%s: colo_cache is NULL in block :%s",
3155 __func__, block->idstr);
3156 return NULL;
3157 }
3158
3159
3160
3161
3162
3163
3164 if (record_bitmap &&
3165 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3166 ram_state->migration_dirty_pages++;
3167 }
3168 return block->colo_cache + offset;
3169}
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3182{
3183 if (ch != 0 || !is_zero_range(host, size)) {
3184 memset(host, ch, size);
3185 }
3186}
3187
3188
3189static int
3190qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3191 const uint8_t *source, size_t source_len)
3192{
3193 int err;
3194
3195 err = inflateReset(stream);
3196 if (err != Z_OK) {
3197 return -1;
3198 }
3199
3200 stream->avail_in = source_len;
3201 stream->next_in = (uint8_t *)source;
3202 stream->avail_out = dest_len;
3203 stream->next_out = dest;
3204
3205 err = inflate(stream, Z_NO_FLUSH);
3206 if (err != Z_STREAM_END) {
3207 return -1;
3208 }
3209
3210 return stream->total_out;
3211}
3212
3213static void *do_data_decompress(void *opaque)
3214{
3215 DecompressParam *param = opaque;
3216 unsigned long pagesize;
3217 uint8_t *des;
3218 int len, ret;
3219
3220 qemu_mutex_lock(¶m->mutex);
3221 while (!param->quit) {
3222 if (param->des) {
3223 des = param->des;
3224 len = param->len;
3225 param->des = 0;
3226 qemu_mutex_unlock(¶m->mutex);
3227
3228 pagesize = TARGET_PAGE_SIZE;
3229
3230 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3231 param->compbuf, len);
3232 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3233 error_report("decompress data failed");
3234 qemu_file_set_error(decomp_file, ret);
3235 }
3236
3237 qemu_mutex_lock(&decomp_done_lock);
3238 param->done = true;
3239 qemu_cond_signal(&decomp_done_cond);
3240 qemu_mutex_unlock(&decomp_done_lock);
3241
3242 qemu_mutex_lock(¶m->mutex);
3243 } else {
3244 qemu_cond_wait(¶m->cond, ¶m->mutex);
3245 }
3246 }
3247 qemu_mutex_unlock(¶m->mutex);
3248
3249 return NULL;
3250}
3251
3252static int wait_for_decompress_done(void)
3253{
3254 int idx, thread_count;
3255
3256 if (!migrate_use_compression()) {
3257 return 0;
3258 }
3259
3260 thread_count = migrate_decompress_threads();
3261 qemu_mutex_lock(&decomp_done_lock);
3262 for (idx = 0; idx < thread_count; idx++) {
3263 while (!decomp_param[idx].done) {
3264 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3265 }
3266 }
3267 qemu_mutex_unlock(&decomp_done_lock);
3268 return qemu_file_get_error(decomp_file);
3269}
3270
3271static void compress_threads_load_cleanup(void)
3272{
3273 int i, thread_count;
3274
3275 if (!migrate_use_compression()) {
3276 return;
3277 }
3278 thread_count = migrate_decompress_threads();
3279 for (i = 0; i < thread_count; i++) {
3280
3281
3282
3283
3284 if (!decomp_param[i].compbuf) {
3285 break;
3286 }
3287
3288 qemu_mutex_lock(&decomp_param[i].mutex);
3289 decomp_param[i].quit = true;
3290 qemu_cond_signal(&decomp_param[i].cond);
3291 qemu_mutex_unlock(&decomp_param[i].mutex);
3292 }
3293 for (i = 0; i < thread_count; i++) {
3294 if (!decomp_param[i].compbuf) {
3295 break;
3296 }
3297
3298 qemu_thread_join(decompress_threads + i);
3299 qemu_mutex_destroy(&decomp_param[i].mutex);
3300 qemu_cond_destroy(&decomp_param[i].cond);
3301 inflateEnd(&decomp_param[i].stream);
3302 g_free(decomp_param[i].compbuf);
3303 decomp_param[i].compbuf = NULL;
3304 }
3305 g_free(decompress_threads);
3306 g_free(decomp_param);
3307 decompress_threads = NULL;
3308 decomp_param = NULL;
3309 decomp_file = NULL;
3310}
3311
3312static int compress_threads_load_setup(QEMUFile *f)
3313{
3314 int i, thread_count;
3315
3316 if (!migrate_use_compression()) {
3317 return 0;
3318 }
3319
3320 thread_count = migrate_decompress_threads();
3321 decompress_threads = g_new0(QemuThread, thread_count);
3322 decomp_param = g_new0(DecompressParam, thread_count);
3323 qemu_mutex_init(&decomp_done_lock);
3324 qemu_cond_init(&decomp_done_cond);
3325 decomp_file = f;
3326 for (i = 0; i < thread_count; i++) {
3327 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3328 goto exit;
3329 }
3330
3331 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3332 qemu_mutex_init(&decomp_param[i].mutex);
3333 qemu_cond_init(&decomp_param[i].cond);
3334 decomp_param[i].done = true;
3335 decomp_param[i].quit = false;
3336 qemu_thread_create(decompress_threads + i, "decompress",
3337 do_data_decompress, decomp_param + i,
3338 QEMU_THREAD_JOINABLE);
3339 }
3340 return 0;
3341exit:
3342 compress_threads_load_cleanup();
3343 return -1;
3344}
3345
3346static void decompress_data_with_multi_threads(QEMUFile *f,
3347 void *host, int len)
3348{
3349 int idx, thread_count;
3350
3351 thread_count = migrate_decompress_threads();
3352 QEMU_LOCK_GUARD(&decomp_done_lock);
3353 while (true) {
3354 for (idx = 0; idx < thread_count; idx++) {
3355 if (decomp_param[idx].done) {
3356 decomp_param[idx].done = false;
3357 qemu_mutex_lock(&decomp_param[idx].mutex);
3358 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3359 decomp_param[idx].des = host;
3360 decomp_param[idx].len = len;
3361 qemu_cond_signal(&decomp_param[idx].cond);
3362 qemu_mutex_unlock(&decomp_param[idx].mutex);
3363 break;
3364 }
3365 }
3366 if (idx < thread_count) {
3367 break;
3368 } else {
3369 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3370 }
3371 }
3372}
3373
3374static void colo_init_ram_state(void)
3375{
3376 ram_state_init(&ram_state);
3377}
3378
3379
3380
3381
3382
3383
3384int colo_init_ram_cache(void)
3385{
3386 RAMBlock *block;
3387
3388 WITH_RCU_READ_LOCK_GUARD() {
3389 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3390 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3391 NULL, false, false);
3392 if (!block->colo_cache) {
3393 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3394 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3395 block->used_length);
3396 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3397 if (block->colo_cache) {
3398 qemu_anon_ram_free(block->colo_cache, block->used_length);
3399 block->colo_cache = NULL;
3400 }
3401 }
3402 return -errno;
3403 }
3404 }
3405 }
3406
3407
3408
3409
3410
3411
3412 if (ram_bytes_total()) {
3413 RAMBlock *block;
3414
3415 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3416 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3417 block->bmap = bitmap_new(pages);
3418 }
3419 }
3420
3421 colo_init_ram_state();
3422 return 0;
3423}
3424
3425
3426void colo_incoming_start_dirty_log(void)
3427{
3428 RAMBlock *block = NULL;
3429
3430 qemu_mutex_lock_iothread();
3431 qemu_mutex_lock_ramlist();
3432
3433 memory_global_dirty_log_sync();
3434 WITH_RCU_READ_LOCK_GUARD() {
3435 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3436 ramblock_sync_dirty_bitmap(ram_state, block);
3437
3438 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3439 }
3440 memory_global_dirty_log_start();
3441 }
3442 ram_state->migration_dirty_pages = 0;
3443 qemu_mutex_unlock_ramlist();
3444 qemu_mutex_unlock_iothread();
3445}
3446
3447
3448void colo_release_ram_cache(void)
3449{
3450 RAMBlock *block;
3451
3452 memory_global_dirty_log_stop();
3453 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3454 g_free(block->bmap);
3455 block->bmap = NULL;
3456 }
3457
3458 WITH_RCU_READ_LOCK_GUARD() {
3459 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3460 if (block->colo_cache) {
3461 qemu_anon_ram_free(block->colo_cache, block->used_length);
3462 block->colo_cache = NULL;
3463 }
3464 }
3465 }
3466 ram_state_cleanup(&ram_state);
3467}
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477static int ram_load_setup(QEMUFile *f, void *opaque)
3478{
3479 if (compress_threads_load_setup(f)) {
3480 return -1;
3481 }
3482
3483 xbzrle_load_setup();
3484 ramblock_recv_map_init();
3485
3486 return 0;
3487}
3488
3489static int ram_load_cleanup(void *opaque)
3490{
3491 RAMBlock *rb;
3492
3493 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3494 qemu_ram_block_writeback(rb);
3495 }
3496
3497 xbzrle_load_cleanup();
3498 compress_threads_load_cleanup();
3499
3500 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3501 g_free(rb->receivedmap);
3502 rb->receivedmap = NULL;
3503 }
3504
3505 return 0;
3506}
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3520{
3521 return postcopy_ram_incoming_init(mis);
3522}
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534static int ram_load_postcopy(QEMUFile *f)
3535{
3536 int flags = 0, ret = 0;
3537 bool place_needed = false;
3538 bool matches_target_page_size = false;
3539 MigrationIncomingState *mis = migration_incoming_get_current();
3540
3541 void *postcopy_host_page = mis->postcopy_tmp_page;
3542 void *host_page = NULL;
3543 bool all_zero = true;
3544 int target_pages = 0;
3545
3546 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3547 ram_addr_t addr;
3548 void *page_buffer = NULL;
3549 void *place_source = NULL;
3550 RAMBlock *block = NULL;
3551 uint8_t ch;
3552 int len;
3553
3554 addr = qemu_get_be64(f);
3555
3556
3557
3558
3559
3560 ret = qemu_file_get_error(f);
3561 if (ret) {
3562 break;
3563 }
3564
3565 flags = addr & ~TARGET_PAGE_MASK;
3566 addr &= TARGET_PAGE_MASK;
3567
3568 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3569 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3570 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
3571 block = ram_block_from_stream(f, flags);
3572 if (!block) {
3573 ret = -EINVAL;
3574 break;
3575 }
3576
3577
3578
3579
3580
3581
3582
3583 if (!block->host || addr >= block->postcopy_length) {
3584 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3585 ret = -EINVAL;
3586 break;
3587 }
3588 target_pages++;
3589 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600 page_buffer = postcopy_host_page +
3601 host_page_offset_from_ram_block_offset(block, addr);
3602
3603 if (target_pages == 1) {
3604 host_page = host_page_from_ram_block_offset(block, addr);
3605 } else if (host_page != host_page_from_ram_block_offset(block,
3606 addr)) {
3607
3608 error_report("Non-same host page %p/%p", host_page,
3609 host_page_from_ram_block_offset(block, addr));
3610 ret = -EINVAL;
3611 break;
3612 }
3613
3614
3615
3616
3617
3618 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3619 place_needed = true;
3620 }
3621 place_source = postcopy_host_page;
3622 }
3623
3624 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3625 case RAM_SAVE_FLAG_ZERO:
3626 ch = qemu_get_byte(f);
3627
3628
3629
3630
3631 if (ch || !matches_target_page_size) {
3632 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3633 }
3634 if (ch) {
3635 all_zero = false;
3636 }
3637 break;
3638
3639 case RAM_SAVE_FLAG_PAGE:
3640 all_zero = false;
3641 if (!matches_target_page_size) {
3642
3643 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3644 } else {
3645
3646
3647
3648
3649
3650
3651
3652
3653 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3654 TARGET_PAGE_SIZE);
3655 }
3656 break;
3657 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3658 all_zero = false;
3659 len = qemu_get_be32(f);
3660 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3661 error_report("Invalid compressed data length: %d", len);
3662 ret = -EINVAL;
3663 break;
3664 }
3665 decompress_data_with_multi_threads(f, page_buffer, len);
3666 break;
3667
3668 case RAM_SAVE_FLAG_EOS:
3669
3670 multifd_recv_sync_main();
3671 break;
3672 default:
3673 error_report("Unknown combination of migration flags: 0x%x"
3674 " (postcopy mode)", flags);
3675 ret = -EINVAL;
3676 break;
3677 }
3678
3679
3680 if (place_needed) {
3681 ret |= wait_for_decompress_done();
3682 }
3683
3684
3685 if (!ret && qemu_file_get_error(f)) {
3686 ret = qemu_file_get_error(f);
3687 }
3688
3689 if (!ret && place_needed) {
3690 if (all_zero) {
3691 ret = postcopy_place_page_zero(mis, host_page, block);
3692 } else {
3693 ret = postcopy_place_page(mis, host_page, place_source,
3694 block);
3695 }
3696 place_needed = false;
3697 target_pages = 0;
3698
3699 all_zero = true;
3700 }
3701 }
3702
3703 return ret;
3704}
3705
3706static bool postcopy_is_advised(void)
3707{
3708 PostcopyState ps = postcopy_state_get();
3709 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3710}
3711
3712static bool postcopy_is_running(void)
3713{
3714 PostcopyState ps = postcopy_state_get();
3715 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3716}
3717
3718
3719
3720
3721
3722void colo_flush_ram_cache(void)
3723{
3724 RAMBlock *block = NULL;
3725 void *dst_host;
3726 void *src_host;
3727 unsigned long offset = 0;
3728
3729 memory_global_dirty_log_sync();
3730 qemu_mutex_lock(&ram_state->bitmap_mutex);
3731 WITH_RCU_READ_LOCK_GUARD() {
3732 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3733 ramblock_sync_dirty_bitmap(ram_state, block);
3734 }
3735 }
3736
3737 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3738 WITH_RCU_READ_LOCK_GUARD() {
3739 block = QLIST_FIRST_RCU(&ram_list.blocks);
3740
3741 while (block) {
3742 offset = migration_bitmap_find_dirty(ram_state, block, offset);
3743
3744 if (!offset_in_ramblock(block,
3745 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
3746 offset = 0;
3747 block = QLIST_NEXT_RCU(block, next);
3748 } else {
3749 migration_bitmap_clear_dirty(ram_state, block, offset);
3750 dst_host = block->host
3751 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3752 src_host = block->colo_cache
3753 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3754 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
3755 }
3756 }
3757 }
3758 trace_colo_flush_ram_cache_end();
3759 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3760}
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772static int ram_load_precopy(QEMUFile *f)
3773{
3774 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
3775
3776 bool postcopy_advised = postcopy_is_advised();
3777 if (!migrate_use_compression()) {
3778 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3779 }
3780
3781 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3782 ram_addr_t addr, total_ram_bytes;
3783 void *host = NULL, *host_bak = NULL;
3784 uint8_t ch;
3785
3786
3787
3788
3789
3790 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3791 aio_co_schedule(qemu_get_current_aio_context(),
3792 qemu_coroutine_self());
3793 qemu_coroutine_yield();
3794 }
3795 i++;
3796
3797 addr = qemu_get_be64(f);
3798 flags = addr & ~TARGET_PAGE_MASK;
3799 addr &= TARGET_PAGE_MASK;
3800
3801 if (flags & invalid_flags) {
3802 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3803 error_report("Received an unexpected compressed page");
3804 }
3805
3806 ret = -EINVAL;
3807 break;
3808 }
3809
3810 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3811 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3812 RAMBlock *block = ram_block_from_stream(f, flags);
3813
3814 host = host_from_ram_block_offset(block, addr);
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826 if (migration_incoming_colo_enabled()) {
3827 if (migration_incoming_in_colo_state()) {
3828
3829 host = colo_cache_from_block_offset(block, addr, true);
3830 } else {
3831
3832
3833
3834
3835 host_bak = colo_cache_from_block_offset(block, addr, false);
3836 }
3837 }
3838 if (!host) {
3839 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3840 ret = -EINVAL;
3841 break;
3842 }
3843 if (!migration_incoming_in_colo_state()) {
3844 ramblock_recv_bitmap_set(block, host);
3845 }
3846
3847 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3848 }
3849
3850 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3851 case RAM_SAVE_FLAG_MEM_SIZE:
3852
3853 total_ram_bytes = addr;
3854 while (!ret && total_ram_bytes) {
3855 RAMBlock *block;
3856 char id[256];
3857 ram_addr_t length;
3858
3859 len = qemu_get_byte(f);
3860 qemu_get_buffer(f, (uint8_t *)id, len);
3861 id[len] = 0;
3862 length = qemu_get_be64(f);
3863
3864 block = qemu_ram_block_by_name(id);
3865 if (block && !qemu_ram_is_migratable(block)) {
3866 error_report("block %s should not be migrated !", id);
3867 ret = -EINVAL;
3868 } else if (block) {
3869 if (length != block->used_length) {
3870 Error *local_err = NULL;
3871
3872 ret = qemu_ram_resize(block, length,
3873 &local_err);
3874 if (local_err) {
3875 error_report_err(local_err);
3876 }
3877 }
3878
3879 if (postcopy_advised && migrate_postcopy_ram() &&
3880 block->page_size != qemu_host_page_size) {
3881 uint64_t remote_page_size = qemu_get_be64(f);
3882 if (remote_page_size != block->page_size) {
3883 error_report("Mismatched RAM page size %s "
3884 "(local) %zd != %" PRId64,
3885 id, block->page_size,
3886 remote_page_size);
3887 ret = -EINVAL;
3888 }
3889 }
3890 if (migrate_ignore_shared()) {
3891 hwaddr addr = qemu_get_be64(f);
3892 if (ramblock_is_ignored(block) &&
3893 block->mr->addr != addr) {
3894 error_report("Mismatched GPAs for block %s "
3895 "%" PRId64 "!= %" PRId64,
3896 id, (uint64_t)addr,
3897 (uint64_t)block->mr->addr);
3898 ret = -EINVAL;
3899 }
3900 }
3901 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3902 block->idstr);
3903 } else {
3904 error_report("Unknown ramblock \"%s\", cannot "
3905 "accept migration", id);
3906 ret = -EINVAL;
3907 }
3908
3909 total_ram_bytes -= length;
3910 }
3911 break;
3912
3913 case RAM_SAVE_FLAG_ZERO:
3914 ch = qemu_get_byte(f);
3915 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3916 break;
3917
3918 case RAM_SAVE_FLAG_PAGE:
3919 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3920 break;
3921
3922 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3923 len = qemu_get_be32(f);
3924 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3925 error_report("Invalid compressed data length: %d", len);
3926 ret = -EINVAL;
3927 break;
3928 }
3929 decompress_data_with_multi_threads(f, host, len);
3930 break;
3931
3932 case RAM_SAVE_FLAG_XBZRLE:
3933 if (load_xbzrle(f, addr, host) < 0) {
3934 error_report("Failed to decompress XBZRLE page at "
3935 RAM_ADDR_FMT, addr);
3936 ret = -EINVAL;
3937 break;
3938 }
3939 break;
3940 case RAM_SAVE_FLAG_EOS:
3941
3942 multifd_recv_sync_main();
3943 break;
3944 default:
3945 if (flags & RAM_SAVE_FLAG_HOOK) {
3946 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
3947 } else {
3948 error_report("Unknown combination of migration flags: 0x%x",
3949 flags);
3950 ret = -EINVAL;
3951 }
3952 }
3953 if (!ret) {
3954 ret = qemu_file_get_error(f);
3955 }
3956 if (!ret && host_bak) {
3957 memcpy(host_bak, host, TARGET_PAGE_SIZE);
3958 }
3959 }
3960
3961 ret |= wait_for_decompress_done();
3962 return ret;
3963}
3964
3965static int ram_load(QEMUFile *f, void *opaque, int version_id)
3966{
3967 int ret = 0;
3968 static uint64_t seq_iter;
3969
3970
3971
3972
3973 bool postcopy_running = postcopy_is_running();
3974
3975 seq_iter++;
3976
3977 if (version_id != 4) {
3978 return -EINVAL;
3979 }
3980
3981
3982
3983
3984
3985
3986
3987 WITH_RCU_READ_LOCK_GUARD() {
3988 if (postcopy_running) {
3989 ret = ram_load_postcopy(f);
3990 } else {
3991 ret = ram_load_precopy(f);
3992 }
3993 }
3994 trace_ram_load_complete(ret, seq_iter);
3995
3996 return ret;
3997}
3998
3999static bool ram_has_postcopy(void *opaque)
4000{
4001 RAMBlock *rb;
4002 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4003 if (ramblock_is_pmem(rb)) {
4004 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4005 "is not supported now!", rb->idstr, rb->host);
4006 return false;
4007 }
4008 }
4009
4010 return migrate_postcopy_ram();
4011}
4012
4013
4014static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4015{
4016 RAMBlock *block;
4017 QEMUFile *file = s->to_dst_file;
4018 int ramblock_count = 0;
4019
4020 trace_ram_dirty_bitmap_sync_start();
4021
4022 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4023 qemu_savevm_send_recv_bitmap(file, block->idstr);
4024 trace_ram_dirty_bitmap_request(block->idstr);
4025 ramblock_count++;
4026 }
4027
4028 trace_ram_dirty_bitmap_sync_wait();
4029
4030
4031 while (ramblock_count--) {
4032 qemu_sem_wait(&s->rp_state.rp_sem);
4033 }
4034
4035 trace_ram_dirty_bitmap_sync_complete();
4036
4037 return 0;
4038}
4039
4040static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4041{
4042 qemu_sem_post(&s->rp_state.rp_sem);
4043}
4044
4045
4046
4047
4048
4049
4050int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4051{
4052 int ret = -EINVAL;
4053
4054 QEMUFile *file = s->rp_state.from_dst_file;
4055 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4056 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4057 uint64_t size, end_mark;
4058
4059 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4060
4061 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4062 error_report("%s: incorrect state %s", __func__,
4063 MigrationStatus_str(s->state));
4064 return -EINVAL;
4065 }
4066
4067
4068
4069
4070
4071 local_size = ROUND_UP(local_size, 8);
4072
4073
4074 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4075
4076 size = qemu_get_be64(file);
4077
4078
4079 if (size != local_size) {
4080 error_report("%s: ramblock '%s' bitmap size mismatch "
4081 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4082 block->idstr, size, local_size);
4083 ret = -EINVAL;
4084 goto out;
4085 }
4086
4087 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4088 end_mark = qemu_get_be64(file);
4089
4090 ret = qemu_file_get_error(file);
4091 if (ret || size != local_size) {
4092 error_report("%s: read bitmap failed for ramblock '%s': %d"
4093 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4094 __func__, block->idstr, ret, local_size, size);
4095 ret = -EIO;
4096 goto out;
4097 }
4098
4099 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4100 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
4101 __func__, block->idstr, end_mark);
4102 ret = -EINVAL;
4103 goto out;
4104 }
4105
4106
4107
4108
4109
4110 bitmap_from_le(block->bmap, le_bitmap, nbits);
4111
4112
4113
4114
4115
4116 bitmap_complement(block->bmap, block->bmap, nbits);
4117
4118 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4119
4120
4121
4122
4123
4124 ram_dirty_bitmap_reload_notify(s);
4125
4126 ret = 0;
4127out:
4128 g_free(le_bitmap);
4129 return ret;
4130}
4131
4132static int ram_resume_prepare(MigrationState *s, void *opaque)
4133{
4134 RAMState *rs = *(RAMState **)opaque;
4135 int ret;
4136
4137 ret = ram_dirty_bitmap_sync_all(s, rs);
4138 if (ret) {
4139 return ret;
4140 }
4141
4142 ram_state_resume_prepare(rs, s->to_dst_file);
4143
4144 return 0;
4145}
4146
4147static SaveVMHandlers savevm_ram_handlers = {
4148 .save_setup = ram_save_setup,
4149 .save_live_iterate = ram_save_iterate,
4150 .save_live_complete_postcopy = ram_save_complete,
4151 .save_live_complete_precopy = ram_save_complete,
4152 .has_postcopy = ram_has_postcopy,
4153 .save_live_pending = ram_save_pending,
4154 .load_state = ram_load,
4155 .save_cleanup = ram_save_cleanup,
4156 .load_setup = ram_load_setup,
4157 .load_cleanup = ram_load_cleanup,
4158 .resume_prepare = ram_resume_prepare,
4159};
4160
4161static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4162 size_t old_size, size_t new_size)
4163{
4164 PostcopyState ps = postcopy_state_get();
4165 ram_addr_t offset;
4166 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4167 Error *err = NULL;
4168
4169 if (ramblock_is_ignored(rb)) {
4170 return;
4171 }
4172
4173 if (!migration_is_idle()) {
4174
4175
4176
4177
4178
4179
4180 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4181 migrate_set_error(migrate_get_current(), err);
4182 error_free(err);
4183 migration_cancel();
4184 }
4185
4186 switch (ps) {
4187 case POSTCOPY_INCOMING_ADVISE:
4188
4189
4190
4191
4192
4193 if (old_size < new_size) {
4194 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4195 error_report("RAM block '%s' discard of resized RAM failed",
4196 rb->idstr);
4197 }
4198 }
4199 rb->postcopy_length = new_size;
4200 break;
4201 case POSTCOPY_INCOMING_NONE:
4202 case POSTCOPY_INCOMING_RUNNING:
4203 case POSTCOPY_INCOMING_END:
4204
4205
4206
4207
4208
4209 break;
4210 default:
4211 error_report("RAM block '%s' resized during postcopy state: %d",
4212 rb->idstr, ps);
4213 exit(-1);
4214 }
4215}
4216
4217static RAMBlockNotifier ram_mig_ram_notifier = {
4218 .ram_block_resized = ram_mig_ram_block_resized,
4219};
4220
4221void ram_mig_init(void)
4222{
4223 qemu_mutex_init(&XBZRLE.lock);
4224 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4225 ram_block_notifier_add(&ram_mig_ram_notifier);
4226}
4227