1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "cpu.h"
31#include "qemu/cutils.h"
32#include "qemu/bitops.h"
33#include "qemu/bitmap.h"
34#include "qemu/main-loop.h"
35#include "xbzrle.h"
36#include "ram.h"
37#include "migration.h"
38#include "migration/register.h"
39#include "migration/misc.h"
40#include "qemu-file.h"
41#include "postcopy-ram.h"
42#include "page_cache.h"
43#include "qemu/error-report.h"
44#include "qapi/error.h"
45#include "qapi/qapi-types-migration.h"
46#include "qapi/qapi-events-migration.h"
47#include "qapi/qmp/qerror.h"
48#include "trace.h"
49#include "exec/ram_addr.h"
50#include "exec/target_page.h"
51#include "qemu/rcu_queue.h"
52#include "migration/colo.h"
53#include "block.h"
54#include "sysemu/sysemu.h"
55#include "sysemu/cpu-throttle.h"
56#include "savevm.h"
57#include "qemu/iov.h"
58#include "multifd.h"
59#include "sysemu/runstate.h"
60
61#if defined(__linux__)
62#include "qemu/userfaultfd.h"
63#endif
64
65
66
67
68
69
70
71
72
73
74#define RAM_SAVE_FLAG_FULL 0x01
75#define RAM_SAVE_FLAG_ZERO 0x02
76#define RAM_SAVE_FLAG_MEM_SIZE 0x04
77#define RAM_SAVE_FLAG_PAGE 0x08
78#define RAM_SAVE_FLAG_EOS 0x10
79#define RAM_SAVE_FLAG_CONTINUE 0x20
80#define RAM_SAVE_FLAG_XBZRLE 0x40
81
82#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
83
84static inline bool is_zero_range(uint8_t *p, uint64_t size)
85{
86 return buffer_is_zero(p, size);
87}
88
89XBZRLECacheStats xbzrle_counters;
90
91
92
93static struct {
94
95 uint8_t *encoded_buf;
96
97 uint8_t *current_buf;
98
99 PageCache *cache;
100 QemuMutex lock;
101
102 uint8_t *zero_target_page;
103
104 uint8_t *decoded_buf;
105} XBZRLE;
106
107static void XBZRLE_cache_lock(void)
108{
109 if (migrate_use_xbzrle()) {
110 qemu_mutex_lock(&XBZRLE.lock);
111 }
112}
113
114static void XBZRLE_cache_unlock(void)
115{
116 if (migrate_use_xbzrle()) {
117 qemu_mutex_unlock(&XBZRLE.lock);
118 }
119}
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134int xbzrle_cache_resize(uint64_t new_size, Error **errp)
135{
136 PageCache *new_cache;
137 int64_t ret = 0;
138
139
140 if (new_size != (size_t)new_size) {
141 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
142 "exceeding address space");
143 return -1;
144 }
145
146 if (new_size == migrate_xbzrle_cache_size()) {
147
148 return 0;
149 }
150
151 XBZRLE_cache_lock();
152
153 if (XBZRLE.cache != NULL) {
154 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
155 if (!new_cache) {
156 ret = -1;
157 goto out;
158 }
159
160 cache_fini(XBZRLE.cache);
161 XBZRLE.cache = new_cache;
162 }
163out:
164 XBZRLE_cache_unlock();
165 return ret;
166}
167
168bool ramblock_is_ignored(RAMBlock *block)
169{
170 return !qemu_ram_is_migratable(block) ||
171 (migrate_ignore_shared() && qemu_ram_is_shared(block));
172}
173
174#undef RAMBLOCK_FOREACH
175
176int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
177{
178 RAMBlock *block;
179 int ret = 0;
180
181 RCU_READ_LOCK_GUARD();
182
183 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
184 ret = func(block, opaque);
185 if (ret) {
186 break;
187 }
188 }
189 return ret;
190}
191
192static void ramblock_recv_map_init(void)
193{
194 RAMBlock *rb;
195
196 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
197 assert(!rb->receivedmap);
198 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
199 }
200}
201
202int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
203{
204 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
205 rb->receivedmap);
206}
207
208bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
209{
210 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
211}
212
213void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
214{
215 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
216}
217
218void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
219 size_t nr)
220{
221 bitmap_set_atomic(rb->receivedmap,
222 ramblock_recv_bitmap_offset(host_addr, rb),
223 nr);
224}
225
226#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
227
228
229
230
231
232
233int64_t ramblock_recv_bitmap_send(QEMUFile *file,
234 const char *block_name)
235{
236 RAMBlock *block = qemu_ram_block_by_name(block_name);
237 unsigned long *le_bitmap, nbits;
238 uint64_t size;
239
240 if (!block) {
241 error_report("%s: invalid block name: %s", __func__, block_name);
242 return -1;
243 }
244
245 nbits = block->used_length >> TARGET_PAGE_BITS;
246
247
248
249
250
251
252 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
253
254
255
256
257
258
259 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
260
261
262 size = DIV_ROUND_UP(nbits, 8);
263
264
265
266
267
268
269
270 size = ROUND_UP(size, 8);
271
272 qemu_put_be64(file, size);
273 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
274
275
276
277
278 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
279 qemu_fflush(file);
280
281 g_free(le_bitmap);
282
283 if (qemu_file_get_error(file)) {
284 return qemu_file_get_error(file);
285 }
286
287 return size + sizeof(size);
288}
289
290
291
292
293
294struct RAMSrcPageRequest {
295 RAMBlock *rb;
296 hwaddr offset;
297 hwaddr len;
298
299 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
300};
301
302
303struct RAMState {
304
305 QEMUFile *f;
306
307 int uffdio_fd;
308
309 RAMBlock *last_seen_block;
310
311 RAMBlock *last_sent_block;
312
313 ram_addr_t last_page;
314
315 uint32_t last_version;
316
317 bool ram_bulk_stage;
318
319 bool fpo_enabled;
320
321 int dirty_rate_high_cnt;
322
323
324 int64_t time_last_bitmap_sync;
325
326 uint64_t bytes_xfer_prev;
327
328 uint64_t num_dirty_pages_period;
329
330 uint64_t xbzrle_cache_miss_prev;
331
332 uint64_t xbzrle_pages_prev;
333
334 uint64_t xbzrle_bytes_prev;
335
336
337
338 uint64_t compress_thread_busy_prev;
339
340 uint64_t compressed_size_prev;
341
342 uint64_t compress_pages_prev;
343
344
345 uint64_t target_page_count_prev;
346
347 uint64_t target_page_count;
348
349 uint64_t migration_dirty_pages;
350
351 QemuMutex bitmap_mutex;
352
353 RAMBlock *last_req_rb;
354
355 QemuMutex src_page_req_mutex;
356 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
357};
358typedef struct RAMState RAMState;
359
360static RAMState *ram_state;
361
362static NotifierWithReturnList precopy_notifier_list;
363
364void precopy_infrastructure_init(void)
365{
366 notifier_with_return_list_init(&precopy_notifier_list);
367}
368
369void precopy_add_notifier(NotifierWithReturn *n)
370{
371 notifier_with_return_list_add(&precopy_notifier_list, n);
372}
373
374void precopy_remove_notifier(NotifierWithReturn *n)
375{
376 notifier_with_return_remove(n);
377}
378
379int precopy_notify(PrecopyNotifyReason reason, Error **errp)
380{
381 PrecopyNotifyData pnd;
382 pnd.reason = reason;
383 pnd.errp = errp;
384
385 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
386}
387
388void precopy_enable_free_page_optimization(void)
389{
390 if (!ram_state) {
391 return;
392 }
393
394 ram_state->fpo_enabled = true;
395}
396
397uint64_t ram_bytes_remaining(void)
398{
399 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
400 0;
401}
402
403MigrationStats ram_counters;
404
405
406struct PageSearchStatus {
407
408 RAMBlock *block;
409
410 unsigned long page;
411
412 bool complete_round;
413};
414typedef struct PageSearchStatus PageSearchStatus;
415
416CompressionStats compression_counters;
417
418struct CompressParam {
419 bool done;
420 bool quit;
421 bool zero_page;
422 QEMUFile *file;
423 QemuMutex mutex;
424 QemuCond cond;
425 RAMBlock *block;
426 ram_addr_t offset;
427
428
429 z_stream stream;
430 uint8_t *originbuf;
431};
432typedef struct CompressParam CompressParam;
433
434struct DecompressParam {
435 bool done;
436 bool quit;
437 QemuMutex mutex;
438 QemuCond cond;
439 void *des;
440 uint8_t *compbuf;
441 int len;
442 z_stream stream;
443};
444typedef struct DecompressParam DecompressParam;
445
446static CompressParam *comp_param;
447static QemuThread *compress_threads;
448
449
450
451
452static QemuMutex comp_done_lock;
453static QemuCond comp_done_cond;
454
455static const QEMUFileOps empty_ops = { };
456
457static QEMUFile *decomp_file;
458static DecompressParam *decomp_param;
459static QemuThread *decompress_threads;
460static QemuMutex decomp_done_lock;
461static QemuCond decomp_done_cond;
462
463static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
464 ram_addr_t offset, uint8_t *source_buf);
465
466static void *do_data_compress(void *opaque)
467{
468 CompressParam *param = opaque;
469 RAMBlock *block;
470 ram_addr_t offset;
471 bool zero_page;
472
473 qemu_mutex_lock(¶m->mutex);
474 while (!param->quit) {
475 if (param->block) {
476 block = param->block;
477 offset = param->offset;
478 param->block = NULL;
479 qemu_mutex_unlock(¶m->mutex);
480
481 zero_page = do_compress_ram_page(param->file, ¶m->stream,
482 block, offset, param->originbuf);
483
484 qemu_mutex_lock(&comp_done_lock);
485 param->done = true;
486 param->zero_page = zero_page;
487 qemu_cond_signal(&comp_done_cond);
488 qemu_mutex_unlock(&comp_done_lock);
489
490 qemu_mutex_lock(¶m->mutex);
491 } else {
492 qemu_cond_wait(¶m->cond, ¶m->mutex);
493 }
494 }
495 qemu_mutex_unlock(¶m->mutex);
496
497 return NULL;
498}
499
500static void compress_threads_save_cleanup(void)
501{
502 int i, thread_count;
503
504 if (!migrate_use_compression() || !comp_param) {
505 return;
506 }
507
508 thread_count = migrate_compress_threads();
509 for (i = 0; i < thread_count; i++) {
510
511
512
513
514 if (!comp_param[i].file) {
515 break;
516 }
517
518 qemu_mutex_lock(&comp_param[i].mutex);
519 comp_param[i].quit = true;
520 qemu_cond_signal(&comp_param[i].cond);
521 qemu_mutex_unlock(&comp_param[i].mutex);
522
523 qemu_thread_join(compress_threads + i);
524 qemu_mutex_destroy(&comp_param[i].mutex);
525 qemu_cond_destroy(&comp_param[i].cond);
526 deflateEnd(&comp_param[i].stream);
527 g_free(comp_param[i].originbuf);
528 qemu_fclose(comp_param[i].file);
529 comp_param[i].file = NULL;
530 }
531 qemu_mutex_destroy(&comp_done_lock);
532 qemu_cond_destroy(&comp_done_cond);
533 g_free(compress_threads);
534 g_free(comp_param);
535 compress_threads = NULL;
536 comp_param = NULL;
537}
538
539static int compress_threads_save_setup(void)
540{
541 int i, thread_count;
542
543 if (!migrate_use_compression()) {
544 return 0;
545 }
546 thread_count = migrate_compress_threads();
547 compress_threads = g_new0(QemuThread, thread_count);
548 comp_param = g_new0(CompressParam, thread_count);
549 qemu_cond_init(&comp_done_cond);
550 qemu_mutex_init(&comp_done_lock);
551 for (i = 0; i < thread_count; i++) {
552 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
553 if (!comp_param[i].originbuf) {
554 goto exit;
555 }
556
557 if (deflateInit(&comp_param[i].stream,
558 migrate_compress_level()) != Z_OK) {
559 g_free(comp_param[i].originbuf);
560 goto exit;
561 }
562
563
564
565
566 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
567 comp_param[i].done = true;
568 comp_param[i].quit = false;
569 qemu_mutex_init(&comp_param[i].mutex);
570 qemu_cond_init(&comp_param[i].cond);
571 qemu_thread_create(compress_threads + i, "compress",
572 do_data_compress, comp_param + i,
573 QEMU_THREAD_JOINABLE);
574 }
575 return 0;
576
577exit:
578 compress_threads_save_cleanup();
579 return -1;
580}
581
582
583
584
585
586
587
588
589
590
591
592
593
594static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
595 ram_addr_t offset)
596{
597 size_t size, len;
598
599 if (block == rs->last_sent_block) {
600 offset |= RAM_SAVE_FLAG_CONTINUE;
601 }
602 qemu_put_be64(f, offset);
603 size = 8;
604
605 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
606 len = strlen(block->idstr);
607 qemu_put_byte(f, len);
608 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
609 size += 1 + len;
610 rs->last_sent_block = block;
611 }
612 return size;
613}
614
615
616
617
618
619
620
621
622
623
624static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
625 uint64_t bytes_dirty_threshold)
626{
627 MigrationState *s = migrate_get_current();
628 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
629 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
630 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
631 int pct_max = s->parameters.max_cpu_throttle;
632
633 uint64_t throttle_now = cpu_throttle_get_percentage();
634 uint64_t cpu_now, cpu_ideal, throttle_inc;
635
636
637 if (!cpu_throttle_active()) {
638 cpu_throttle_set(pct_initial);
639 } else {
640
641 if (!pct_tailslow) {
642 throttle_inc = pct_increment;
643 } else {
644
645
646 cpu_now = 100 - throttle_now;
647 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
648 bytes_dirty_period);
649 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
650 }
651 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
652 }
653}
654
655
656
657
658
659
660
661
662
663
664
665
666
667static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
668{
669 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
670 return;
671 }
672
673
674
675 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
676 ram_counters.dirty_sync_count);
677}
678
679#define ENCODING_FLAG_XBZRLE 0x1
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
696 ram_addr_t current_addr, RAMBlock *block,
697 ram_addr_t offset, bool last_stage)
698{
699 int encoded_len = 0, bytes_xbzrle;
700 uint8_t *prev_cached_page;
701
702 if (!cache_is_cached(XBZRLE.cache, current_addr,
703 ram_counters.dirty_sync_count)) {
704 xbzrle_counters.cache_miss++;
705 if (!last_stage) {
706 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
707 ram_counters.dirty_sync_count) == -1) {
708 return -1;
709 } else {
710
711
712 *current_data = get_cached_data(XBZRLE.cache, current_addr);
713 }
714 }
715 return -1;
716 }
717
718
719
720
721
722
723
724
725
726
727
728
729 xbzrle_counters.pages++;
730 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
731
732
733 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
734
735
736 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
737 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
738 TARGET_PAGE_SIZE);
739
740
741
742
743
744 if (!last_stage && encoded_len != 0) {
745 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
746
747
748
749
750
751 *current_data = prev_cached_page;
752 }
753
754 if (encoded_len == 0) {
755 trace_save_xbzrle_page_skipping();
756 return 0;
757 } else if (encoded_len == -1) {
758 trace_save_xbzrle_page_overflow();
759 xbzrle_counters.overflow++;
760 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
761 return -1;
762 }
763
764
765 bytes_xbzrle = save_page_header(rs, rs->f, block,
766 offset | RAM_SAVE_FLAG_XBZRLE);
767 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
768 qemu_put_be16(rs->f, encoded_len);
769 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
770 bytes_xbzrle += encoded_len + 1 + 2;
771
772
773
774
775
776 xbzrle_counters.bytes += bytes_xbzrle - 8;
777 ram_counters.transferred += bytes_xbzrle;
778
779 return 1;
780}
781
782
783
784
785
786
787
788
789
790
791static inline
792unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
793 unsigned long start)
794{
795 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
796 unsigned long *bitmap = rb->bmap;
797 unsigned long next;
798
799 if (ramblock_is_ignored(rb)) {
800 return size;
801 }
802
803
804
805
806
807 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
808 next = start + 1;
809 } else {
810 next = find_next_bit(bitmap, size, start);
811 }
812
813 return next;
814}
815
816static inline bool migration_bitmap_clear_dirty(RAMState *rs,
817 RAMBlock *rb,
818 unsigned long page)
819{
820 bool ret;
821
822 QEMU_LOCK_GUARD(&rs->bitmap_mutex);
823
824
825
826
827
828
829
830
831
832 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
833 uint8_t shift = rb->clear_bmap_shift;
834 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
835 hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
836
837
838
839
840
841
842
843
844
845 assert(shift >= 6);
846 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
847 memory_region_clear_dirty_bitmap(rb->mr, start, size);
848 }
849
850 ret = test_and_clear_bit(page, rb->bmap);
851
852 if (ret) {
853 rs->migration_dirty_pages--;
854 }
855
856 return ret;
857}
858
859
860static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
861{
862 uint64_t new_dirty_pages =
863 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
864
865 rs->migration_dirty_pages += new_dirty_pages;
866 rs->num_dirty_pages_period += new_dirty_pages;
867}
868
869
870
871
872
873
874
875
876
877
878uint64_t ram_pagesize_summary(void)
879{
880 RAMBlock *block;
881 uint64_t summary = 0;
882
883 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
884 summary |= block->page_size;
885 }
886
887 return summary;
888}
889
890uint64_t ram_get_total_transferred_pages(void)
891{
892 return ram_counters.normal + ram_counters.duplicate +
893 compression_counters.pages + xbzrle_counters.pages;
894}
895
896static void migration_update_rates(RAMState *rs, int64_t end_time)
897{
898 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
899 double compressed_size;
900
901
902 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
903 / (end_time - rs->time_last_bitmap_sync);
904
905 if (!page_count) {
906 return;
907 }
908
909 if (migrate_use_xbzrle()) {
910 double encoded_size, unencoded_size;
911
912 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
913 rs->xbzrle_cache_miss_prev) / page_count;
914 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
915 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
916 TARGET_PAGE_SIZE;
917 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
918 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
919 xbzrle_counters.encoding_rate = 0;
920 } else {
921 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
922 }
923 rs->xbzrle_pages_prev = xbzrle_counters.pages;
924 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
925 }
926
927 if (migrate_use_compression()) {
928 compression_counters.busy_rate = (double)(compression_counters.busy -
929 rs->compress_thread_busy_prev) / page_count;
930 rs->compress_thread_busy_prev = compression_counters.busy;
931
932 compressed_size = compression_counters.compressed_size -
933 rs->compressed_size_prev;
934 if (compressed_size) {
935 double uncompressed_size = (compression_counters.pages -
936 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
937
938
939 compression_counters.compression_rate =
940 uncompressed_size / compressed_size;
941
942 rs->compress_pages_prev = compression_counters.pages;
943 rs->compressed_size_prev = compression_counters.compressed_size;
944 }
945 }
946}
947
948static void migration_trigger_throttle(RAMState *rs)
949{
950 MigrationState *s = migrate_get_current();
951 uint64_t threshold = s->parameters.throttle_trigger_threshold;
952
953 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
954 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
955 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
956
957
958
959
960 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
961
962
963
964
965
966
967 if ((bytes_dirty_period > bytes_dirty_threshold) &&
968 (++rs->dirty_rate_high_cnt >= 2)) {
969 trace_migration_throttle();
970 rs->dirty_rate_high_cnt = 0;
971 mig_throttle_guest_down(bytes_dirty_period,
972 bytes_dirty_threshold);
973 }
974 }
975}
976
977static void migration_bitmap_sync(RAMState *rs)
978{
979 RAMBlock *block;
980 int64_t end_time;
981
982 ram_counters.dirty_sync_count++;
983
984 if (!rs->time_last_bitmap_sync) {
985 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
986 }
987
988 trace_migration_bitmap_sync_start();
989 memory_global_dirty_log_sync();
990
991 qemu_mutex_lock(&rs->bitmap_mutex);
992 WITH_RCU_READ_LOCK_GUARD() {
993 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
994 ramblock_sync_dirty_bitmap(rs, block);
995 }
996 ram_counters.remaining = ram_bytes_remaining();
997 }
998 qemu_mutex_unlock(&rs->bitmap_mutex);
999
1000 memory_global_after_dirty_log_sync();
1001 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1002
1003 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1004
1005
1006 if (end_time > rs->time_last_bitmap_sync + 1000) {
1007 migration_trigger_throttle(rs);
1008
1009 migration_update_rates(rs, end_time);
1010
1011 rs->target_page_count_prev = rs->target_page_count;
1012
1013
1014 rs->time_last_bitmap_sync = end_time;
1015 rs->num_dirty_pages_period = 0;
1016 rs->bytes_xfer_prev = ram_counters.transferred;
1017 }
1018 if (migrate_use_events()) {
1019 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1020 }
1021}
1022
1023static void migration_bitmap_sync_precopy(RAMState *rs)
1024{
1025 Error *local_err = NULL;
1026
1027
1028
1029
1030
1031 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1032 error_report_err(local_err);
1033 local_err = NULL;
1034 }
1035
1036 migration_bitmap_sync(rs);
1037
1038 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1039 error_report_err(local_err);
1040 }
1041}
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1055 RAMBlock *block, ram_addr_t offset)
1056{
1057 uint8_t *p = block->host + offset;
1058 int len = 0;
1059
1060 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1061 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1062 qemu_put_byte(file, 0);
1063 len += 1;
1064 }
1065 return len;
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1078{
1079 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1080
1081 if (len) {
1082 ram_counters.duplicate++;
1083 ram_counters.transferred += len;
1084 return 1;
1085 }
1086 return -1;
1087}
1088
1089static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1090{
1091 if (!migrate_release_ram() || !migration_in_postcopy()) {
1092 return;
1093 }
1094
1095 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
1096}
1097
1098
1099
1100
1101
1102
1103
1104
1105static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1106 int *pages)
1107{
1108 uint64_t bytes_xmit = 0;
1109 int ret;
1110
1111 *pages = -1;
1112 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1113 &bytes_xmit);
1114 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1115 return false;
1116 }
1117
1118 if (bytes_xmit) {
1119 ram_counters.transferred += bytes_xmit;
1120 *pages = 1;
1121 }
1122
1123 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1124 return true;
1125 }
1126
1127 if (bytes_xmit > 0) {
1128 ram_counters.normal++;
1129 } else if (bytes_xmit == 0) {
1130 ram_counters.duplicate++;
1131 }
1132
1133 return true;
1134}
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1148 uint8_t *buf, bool async)
1149{
1150 ram_counters.transferred += save_page_header(rs, rs->f, block,
1151 offset | RAM_SAVE_FLAG_PAGE);
1152 if (async) {
1153 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1154 migrate_release_ram() &
1155 migration_in_postcopy());
1156 } else {
1157 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1158 }
1159 ram_counters.transferred += TARGET_PAGE_SIZE;
1160 ram_counters.normal++;
1161 return 1;
1162}
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1178{
1179 int pages = -1;
1180 uint8_t *p;
1181 bool send_async = true;
1182 RAMBlock *block = pss->block;
1183 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1184 ram_addr_t current_addr = block->offset + offset;
1185
1186 p = block->host + offset;
1187 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1188
1189 XBZRLE_cache_lock();
1190 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1191 migrate_use_xbzrle()) {
1192 pages = save_xbzrle_page(rs, &p, current_addr, block,
1193 offset, last_stage);
1194 if (!last_stage) {
1195
1196
1197
1198 send_async = false;
1199 }
1200 }
1201
1202
1203 if (pages == -1) {
1204 pages = save_normal_page(rs, block, offset, p, send_async);
1205 }
1206
1207 XBZRLE_cache_unlock();
1208
1209 return pages;
1210}
1211
1212static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1213 ram_addr_t offset)
1214{
1215 if (multifd_queue_page(rs->f, block, offset) < 0) {
1216 return -1;
1217 }
1218 ram_counters.normal++;
1219
1220 return 1;
1221}
1222
1223static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1224 ram_addr_t offset, uint8_t *source_buf)
1225{
1226 RAMState *rs = ram_state;
1227 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1228 bool zero_page = false;
1229 int ret;
1230
1231 if (save_zero_page_to_file(rs, f, block, offset)) {
1232 zero_page = true;
1233 goto exit;
1234 }
1235
1236 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1237
1238
1239
1240
1241
1242
1243 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1244 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1245 if (ret < 0) {
1246 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1247 error_report("compressed data failed!");
1248 return false;
1249 }
1250
1251exit:
1252 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1253 return zero_page;
1254}
1255
1256static void
1257update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1258{
1259 ram_counters.transferred += bytes_xmit;
1260
1261 if (param->zero_page) {
1262 ram_counters.duplicate++;
1263 return;
1264 }
1265
1266
1267 compression_counters.compressed_size += bytes_xmit - 8;
1268 compression_counters.pages++;
1269}
1270
1271static bool save_page_use_compression(RAMState *rs);
1272
1273static void flush_compressed_data(RAMState *rs)
1274{
1275 int idx, len, thread_count;
1276
1277 if (!save_page_use_compression(rs)) {
1278 return;
1279 }
1280 thread_count = migrate_compress_threads();
1281
1282 qemu_mutex_lock(&comp_done_lock);
1283 for (idx = 0; idx < thread_count; idx++) {
1284 while (!comp_param[idx].done) {
1285 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1286 }
1287 }
1288 qemu_mutex_unlock(&comp_done_lock);
1289
1290 for (idx = 0; idx < thread_count; idx++) {
1291 qemu_mutex_lock(&comp_param[idx].mutex);
1292 if (!comp_param[idx].quit) {
1293 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1294
1295
1296
1297
1298
1299 update_compress_thread_counts(&comp_param[idx], len);
1300 }
1301 qemu_mutex_unlock(&comp_param[idx].mutex);
1302 }
1303}
1304
1305static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1306 ram_addr_t offset)
1307{
1308 param->block = block;
1309 param->offset = offset;
1310}
1311
1312static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1313 ram_addr_t offset)
1314{
1315 int idx, thread_count, bytes_xmit = -1, pages = -1;
1316 bool wait = migrate_compress_wait_thread();
1317
1318 thread_count = migrate_compress_threads();
1319 qemu_mutex_lock(&comp_done_lock);
1320retry:
1321 for (idx = 0; idx < thread_count; idx++) {
1322 if (comp_param[idx].done) {
1323 comp_param[idx].done = false;
1324 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1325 qemu_mutex_lock(&comp_param[idx].mutex);
1326 set_compress_params(&comp_param[idx], block, offset);
1327 qemu_cond_signal(&comp_param[idx].cond);
1328 qemu_mutex_unlock(&comp_param[idx].mutex);
1329 pages = 1;
1330 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1331 break;
1332 }
1333 }
1334
1335
1336
1337
1338
1339 if (pages < 0 && wait) {
1340 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1341 goto retry;
1342 }
1343 qemu_mutex_unlock(&comp_done_lock);
1344
1345 return pages;
1346}
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1359{
1360 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1361 if (pss->complete_round && pss->block == rs->last_seen_block &&
1362 pss->page >= rs->last_page) {
1363
1364
1365
1366
1367 *again = false;
1368 return false;
1369 }
1370 if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
1371 >= pss->block->used_length) {
1372
1373 pss->page = 0;
1374 pss->block = QLIST_NEXT_RCU(pss->block, next);
1375 if (!pss->block) {
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385 flush_compressed_data(rs);
1386
1387
1388 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1389
1390 pss->complete_round = true;
1391 rs->ram_bulk_stage = false;
1392 }
1393
1394 *again = true;
1395 return false;
1396 } else {
1397
1398 *again = true;
1399
1400 return true;
1401 }
1402}
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1415{
1416 RAMBlock *block = NULL;
1417
1418 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1419 return NULL;
1420 }
1421
1422 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1423 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1424 struct RAMSrcPageRequest *entry =
1425 QSIMPLEQ_FIRST(&rs->src_page_requests);
1426 block = entry->rb;
1427 *offset = entry->offset;
1428
1429 if (entry->len > TARGET_PAGE_SIZE) {
1430 entry->len -= TARGET_PAGE_SIZE;
1431 entry->offset += TARGET_PAGE_SIZE;
1432 } else {
1433 memory_region_unref(block->mr);
1434 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1435 g_free(entry);
1436 migration_consume_urgent_request();
1437 }
1438 }
1439
1440 return block;
1441}
1442
1443#if defined(__linux__)
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1455{
1456 struct uffd_msg uffd_msg;
1457 void *page_address;
1458 RAMBlock *block;
1459 int res;
1460
1461 if (!migrate_background_snapshot()) {
1462 return NULL;
1463 }
1464
1465 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1466 if (res <= 0) {
1467 return NULL;
1468 }
1469
1470 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
1471 block = qemu_ram_block_from_host(page_address, false, offset);
1472 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1473 return block;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1487 unsigned long start_page)
1488{
1489 int res = 0;
1490
1491
1492 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1493 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1494 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
1495
1496
1497 qemu_fflush(rs->f);
1498
1499 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1500 false, false);
1501 }
1502
1503 return res;
1504}
1505
1506
1507
1508
1509
1510bool ram_write_tracking_available(void)
1511{
1512 uint64_t uffd_features;
1513 int res;
1514
1515 res = uffd_query_features(&uffd_features);
1516 return (res == 0 &&
1517 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1518}
1519
1520
1521
1522
1523
1524
1525bool ram_write_tracking_compatible(void)
1526{
1527 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1528 int uffd_fd;
1529 RAMBlock *block;
1530 bool ret = false;
1531
1532
1533 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1534 if (uffd_fd < 0) {
1535 return false;
1536 }
1537
1538 RCU_READ_LOCK_GUARD();
1539
1540 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1541 uint64_t uffd_ioctls;
1542
1543
1544 if (block->mr->readonly || block->mr->rom_device) {
1545 continue;
1546 }
1547
1548 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1549 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1550 goto out;
1551 }
1552 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1553 goto out;
1554 }
1555 }
1556 ret = true;
1557
1558out:
1559 uffd_close_fd(uffd_fd);
1560 return ret;
1561}
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572static void ram_block_populate_pages(RAMBlock *block)
1573{
1574 char *ptr = (char *) block->host;
1575
1576 for (ram_addr_t offset = 0; offset < block->used_length;
1577 offset += qemu_real_host_page_size) {
1578 char tmp = *(ptr + offset);
1579
1580
1581 asm volatile("" : "+r" (tmp));
1582 }
1583}
1584
1585
1586
1587
1588void ram_write_tracking_prepare(void)
1589{
1590 RAMBlock *block;
1591
1592 RCU_READ_LOCK_GUARD();
1593
1594 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1595
1596 if (block->mr->readonly || block->mr->rom_device) {
1597 continue;
1598 }
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608 ram_block_populate_pages(block);
1609 }
1610}
1611
1612
1613
1614
1615
1616
1617int ram_write_tracking_start(void)
1618{
1619 int uffd_fd;
1620 RAMState *rs = ram_state;
1621 RAMBlock *block;
1622
1623
1624 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1625 if (uffd_fd < 0) {
1626 return uffd_fd;
1627 }
1628 rs->uffdio_fd = uffd_fd;
1629
1630 RCU_READ_LOCK_GUARD();
1631
1632 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1633
1634 if (block->mr->readonly || block->mr->rom_device) {
1635 continue;
1636 }
1637
1638
1639 if (uffd_register_memory(rs->uffdio_fd, block->host,
1640 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1641 goto fail;
1642 }
1643
1644 if (uffd_change_protection(rs->uffdio_fd, block->host,
1645 block->max_length, true, false)) {
1646 goto fail;
1647 }
1648 block->flags |= RAM_UF_WRITEPROTECT;
1649 memory_region_ref(block->mr);
1650
1651 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1652 block->host, block->max_length);
1653 }
1654
1655 return 0;
1656
1657fail:
1658 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1659
1660 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1661 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1662 continue;
1663 }
1664
1665
1666
1667
1668 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1669 false, false);
1670 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1671
1672 block->flags &= ~RAM_UF_WRITEPROTECT;
1673 memory_region_unref(block->mr);
1674 }
1675
1676 uffd_close_fd(uffd_fd);
1677 rs->uffdio_fd = -1;
1678 return -1;
1679}
1680
1681
1682
1683
1684void ram_write_tracking_stop(void)
1685{
1686 RAMState *rs = ram_state;
1687 RAMBlock *block;
1688
1689 RCU_READ_LOCK_GUARD();
1690
1691 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1692 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1693 continue;
1694 }
1695
1696 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1697 false, false);
1698 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1699
1700 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1701 block->host, block->max_length);
1702
1703
1704 block->flags &= ~RAM_UF_WRITEPROTECT;
1705 memory_region_unref(block->mr);
1706 }
1707
1708
1709 uffd_close_fd(rs->uffdio_fd);
1710 rs->uffdio_fd = -1;
1711}
1712
1713#else
1714
1715
1716static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1717{
1718 (void) rs;
1719 (void) offset;
1720
1721 return NULL;
1722}
1723
1724static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1725 unsigned long start_page)
1726{
1727 (void) rs;
1728 (void) pss;
1729 (void) start_page;
1730
1731 return 0;
1732}
1733
1734bool ram_write_tracking_available(void)
1735{
1736 return false;
1737}
1738
1739bool ram_write_tracking_compatible(void)
1740{
1741 assert(0);
1742 return false;
1743}
1744
1745int ram_write_tracking_start(void)
1746{
1747 assert(0);
1748 return -1;
1749}
1750
1751void ram_write_tracking_stop(void)
1752{
1753 assert(0);
1754}
1755#endif
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1768{
1769 RAMBlock *block;
1770 ram_addr_t offset;
1771 bool dirty;
1772
1773 do {
1774 block = unqueue_page(rs, &offset);
1775
1776
1777
1778
1779
1780
1781 if (block) {
1782 unsigned long page;
1783
1784 page = offset >> TARGET_PAGE_BITS;
1785 dirty = test_bit(page, block->bmap);
1786 if (!dirty) {
1787 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1788 page);
1789 } else {
1790 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1791 }
1792 }
1793
1794 } while (block && !dirty);
1795
1796 if (!block) {
1797
1798
1799
1800
1801 block = poll_fault_page(rs, &offset);
1802 }
1803
1804 if (block) {
1805
1806
1807
1808
1809
1810
1811 rs->ram_bulk_stage = false;
1812
1813
1814
1815
1816
1817
1818 pss->block = block;
1819 pss->page = offset >> TARGET_PAGE_BITS;
1820
1821
1822
1823
1824
1825 pss->complete_round = false;
1826 }
1827
1828 return !!block;
1829}
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839static void migration_page_queue_free(RAMState *rs)
1840{
1841 struct RAMSrcPageRequest *mspr, *next_mspr;
1842
1843
1844
1845 RCU_READ_LOCK_GUARD();
1846 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1847 memory_region_unref(mspr->rb->mr);
1848 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1849 g_free(mspr);
1850 }
1851}
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1866{
1867 RAMBlock *ramblock;
1868 RAMState *rs = ram_state;
1869
1870 ram_counters.postcopy_requests++;
1871 RCU_READ_LOCK_GUARD();
1872
1873 if (!rbname) {
1874
1875 ramblock = rs->last_req_rb;
1876
1877 if (!ramblock) {
1878
1879
1880
1881
1882 error_report("ram_save_queue_pages no previous block");
1883 return -1;
1884 }
1885 } else {
1886 ramblock = qemu_ram_block_by_name(rbname);
1887
1888 if (!ramblock) {
1889
1890 error_report("ram_save_queue_pages no block '%s'", rbname);
1891 return -1;
1892 }
1893 rs->last_req_rb = ramblock;
1894 }
1895 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1896 if (start + len > ramblock->used_length) {
1897 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1898 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1899 __func__, start, len, ramblock->used_length);
1900 return -1;
1901 }
1902
1903 struct RAMSrcPageRequest *new_entry =
1904 g_malloc0(sizeof(struct RAMSrcPageRequest));
1905 new_entry->rb = ramblock;
1906 new_entry->offset = start;
1907 new_entry->len = len;
1908
1909 memory_region_ref(ramblock->mr);
1910 qemu_mutex_lock(&rs->src_page_req_mutex);
1911 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1912 migration_make_urgent_request();
1913 qemu_mutex_unlock(&rs->src_page_req_mutex);
1914
1915 return 0;
1916}
1917
1918static bool save_page_use_compression(RAMState *rs)
1919{
1920 if (!migrate_use_compression()) {
1921 return false;
1922 }
1923
1924
1925
1926
1927
1928
1929 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1930 return true;
1931 }
1932
1933 return false;
1934}
1935
1936
1937
1938
1939
1940
1941static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1942{
1943 if (!save_page_use_compression(rs)) {
1944 return false;
1945 }
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957 if (block != rs->last_sent_block) {
1958 flush_compressed_data(rs);
1959 return false;
1960 }
1961
1962 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
1963 return true;
1964 }
1965
1966 compression_counters.busy++;
1967 return false;
1968}
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1980 bool last_stage)
1981{
1982 RAMBlock *block = pss->block;
1983 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1984 int res;
1985
1986 if (control_save_page(rs, block, offset, &res)) {
1987 return res;
1988 }
1989
1990 if (save_compress_page(rs, block, offset)) {
1991 return 1;
1992 }
1993
1994 res = save_zero_page(rs, block, offset);
1995 if (res > 0) {
1996
1997
1998
1999 if (!save_page_use_compression(rs)) {
2000 XBZRLE_cache_lock();
2001 xbzrle_cache_zero_page(rs, block->offset + offset);
2002 XBZRLE_cache_unlock();
2003 }
2004 ram_release_pages(block->idstr, offset, res);
2005 return res;
2006 }
2007
2008
2009
2010
2011
2012
2013
2014 if (!save_page_use_compression(rs) && migrate_use_multifd()
2015 && !migration_in_postcopy()) {
2016 return ram_save_multifd_page(rs, block, offset);
2017 }
2018
2019 return ram_save_page(rs, pss, last_stage);
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2041 bool last_stage)
2042{
2043 int tmppages, pages = 0;
2044 size_t pagesize_bits =
2045 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2046 unsigned long start_page = pss->page;
2047 int res;
2048
2049 if (ramblock_is_ignored(pss->block)) {
2050 error_report("block %s should not be migrated !", pss->block->idstr);
2051 return 0;
2052 }
2053
2054 do {
2055
2056 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2057 pss->page++;
2058 continue;
2059 }
2060
2061 tmppages = ram_save_target_page(rs, pss, last_stage);
2062 if (tmppages < 0) {
2063 return tmppages;
2064 }
2065
2066 pages += tmppages;
2067 pss->page++;
2068
2069 migration_rate_limit();
2070 } while ((pss->page & (pagesize_bits - 1)) &&
2071 offset_in_ramblock(pss->block,
2072 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
2073
2074 pss->page--;
2075
2076 res = ram_save_release_protection(rs, pss, start_page);
2077 return (res < 0 ? res : pages);
2078}
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2096{
2097 PageSearchStatus pss;
2098 int pages = 0;
2099 bool again, found;
2100
2101
2102 if (!ram_bytes_total()) {
2103 return pages;
2104 }
2105
2106 pss.block = rs->last_seen_block;
2107 pss.page = rs->last_page;
2108 pss.complete_round = false;
2109
2110 if (!pss.block) {
2111 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2112 }
2113
2114 do {
2115 again = true;
2116 found = get_queued_page(rs, &pss);
2117
2118 if (!found) {
2119
2120 found = find_dirty_block(rs, &pss, &again);
2121 }
2122
2123 if (found) {
2124 pages = ram_save_host_page(rs, &pss, last_stage);
2125 }
2126 } while (!pages && again);
2127
2128 rs->last_seen_block = pss.block;
2129 rs->last_page = pss.page;
2130
2131 return pages;
2132}
2133
2134void acct_update_position(QEMUFile *f, size_t size, bool zero)
2135{
2136 uint64_t pages = size / TARGET_PAGE_SIZE;
2137
2138 if (zero) {
2139 ram_counters.duplicate += pages;
2140 } else {
2141 ram_counters.normal += pages;
2142 ram_counters.transferred += size;
2143 qemu_update_position(f, size);
2144 }
2145}
2146
2147static uint64_t ram_bytes_total_common(bool count_ignored)
2148{
2149 RAMBlock *block;
2150 uint64_t total = 0;
2151
2152 RCU_READ_LOCK_GUARD();
2153
2154 if (count_ignored) {
2155 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2156 total += block->used_length;
2157 }
2158 } else {
2159 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2160 total += block->used_length;
2161 }
2162 }
2163 return total;
2164}
2165
2166uint64_t ram_bytes_total(void)
2167{
2168 return ram_bytes_total_common(false);
2169}
2170
2171static void xbzrle_load_setup(void)
2172{
2173 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2174}
2175
2176static void xbzrle_load_cleanup(void)
2177{
2178 g_free(XBZRLE.decoded_buf);
2179 XBZRLE.decoded_buf = NULL;
2180}
2181
2182static void ram_state_cleanup(RAMState **rsp)
2183{
2184 if (*rsp) {
2185 migration_page_queue_free(*rsp);
2186 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2187 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2188 g_free(*rsp);
2189 *rsp = NULL;
2190 }
2191}
2192
2193static void xbzrle_cleanup(void)
2194{
2195 XBZRLE_cache_lock();
2196 if (XBZRLE.cache) {
2197 cache_fini(XBZRLE.cache);
2198 g_free(XBZRLE.encoded_buf);
2199 g_free(XBZRLE.current_buf);
2200 g_free(XBZRLE.zero_target_page);
2201 XBZRLE.cache = NULL;
2202 XBZRLE.encoded_buf = NULL;
2203 XBZRLE.current_buf = NULL;
2204 XBZRLE.zero_target_page = NULL;
2205 }
2206 XBZRLE_cache_unlock();
2207}
2208
2209static void ram_save_cleanup(void *opaque)
2210{
2211 RAMState **rsp = opaque;
2212 RAMBlock *block;
2213
2214
2215 if (!migrate_background_snapshot()) {
2216
2217
2218
2219 memory_global_dirty_log_stop();
2220 }
2221
2222 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2223 g_free(block->clear_bmap);
2224 block->clear_bmap = NULL;
2225 g_free(block->bmap);
2226 block->bmap = NULL;
2227 }
2228
2229 xbzrle_cleanup();
2230 compress_threads_save_cleanup();
2231 ram_state_cleanup(rsp);
2232}
2233
2234static void ram_state_reset(RAMState *rs)
2235{
2236 rs->last_seen_block = NULL;
2237 rs->last_sent_block = NULL;
2238 rs->last_page = 0;
2239 rs->last_version = ram_list.version;
2240 rs->ram_bulk_stage = true;
2241 rs->fpo_enabled = false;
2242}
2243
2244#define MAX_WAIT 50
2245
2246
2247
2248
2249
2250
2251void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2252 unsigned long pages)
2253{
2254 int64_t cur;
2255 int64_t linelen = 128;
2256 char linebuf[129];
2257
2258 for (cur = 0; cur < pages; cur += linelen) {
2259 int64_t curb;
2260 bool found = false;
2261
2262
2263
2264
2265 if (cur + linelen > pages) {
2266 linelen = pages - cur;
2267 }
2268 for (curb = 0; curb < linelen; curb++) {
2269 bool thisbit = test_bit(cur + curb, todump);
2270 linebuf[curb] = thisbit ? '1' : '.';
2271 found = found || (thisbit != expected);
2272 }
2273 if (found) {
2274 linebuf[curb] = '\0';
2275 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2276 }
2277 }
2278}
2279
2280
2281
2282void ram_postcopy_migrated_memory_release(MigrationState *ms)
2283{
2284 struct RAMBlock *block;
2285
2286 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2287 unsigned long *bitmap = block->bmap;
2288 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2289 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2290
2291 while (run_start < range) {
2292 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2293 ram_discard_range(block->idstr,
2294 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2295 ((ram_addr_t)(run_end - run_start))
2296 << TARGET_PAGE_BITS);
2297 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2298 }
2299 }
2300}
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2313{
2314 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2315 unsigned long current;
2316 unsigned long *bitmap = block->bmap;
2317
2318 for (current = 0; current < end; ) {
2319 unsigned long one = find_next_bit(bitmap, end, current);
2320 unsigned long zero, discard_length;
2321
2322 if (one >= end) {
2323 break;
2324 }
2325
2326 zero = find_next_zero_bit(bitmap, end, one + 1);
2327
2328 if (zero >= end) {
2329 discard_length = end - one;
2330 } else {
2331 discard_length = zero - one;
2332 }
2333 postcopy_discard_send_range(ms, one, discard_length);
2334 current = one + discard_length;
2335 }
2336
2337 return 0;
2338}
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353static int postcopy_each_ram_send_discard(MigrationState *ms)
2354{
2355 struct RAMBlock *block;
2356 int ret;
2357
2358 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2359 postcopy_discard_send_init(ms, block->idstr);
2360
2361
2362
2363
2364
2365
2366 ret = postcopy_send_discard_bm_ram(ms, block);
2367 postcopy_discard_send_finish(ms);
2368 if (ret) {
2369 return ret;
2370 }
2371 }
2372
2373 return 0;
2374}
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2390{
2391 RAMState *rs = ram_state;
2392 unsigned long *bitmap = block->bmap;
2393 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2394 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2395 unsigned long run_start;
2396
2397 if (block->page_size == TARGET_PAGE_SIZE) {
2398
2399 return;
2400 }
2401
2402
2403 run_start = find_next_bit(bitmap, pages, 0);
2404
2405 while (run_start < pages) {
2406
2407
2408
2409
2410
2411 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2412
2413 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2414
2415
2416
2417
2418
2419 }
2420
2421 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2422 unsigned long page;
2423 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2424 host_ratio);
2425 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2426
2427
2428 for (page = fixup_start_addr;
2429 page < fixup_start_addr + host_ratio; page++) {
2430
2431
2432
2433
2434 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2435 }
2436 }
2437
2438
2439 run_start = find_next_bit(bitmap, pages, run_start);
2440 }
2441}
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2458{
2459 postcopy_discard_send_init(ms, block->idstr);
2460
2461
2462
2463
2464 postcopy_chunk_hostpages_pass(ms, block);
2465
2466 postcopy_discard_send_finish(ms);
2467 return 0;
2468}
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2486{
2487 RAMState *rs = ram_state;
2488 RAMBlock *block;
2489 int ret;
2490
2491 RCU_READ_LOCK_GUARD();
2492
2493
2494 migration_bitmap_sync(rs);
2495
2496
2497 rs->last_seen_block = NULL;
2498 rs->last_sent_block = NULL;
2499 rs->last_page = 0;
2500
2501 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2502
2503 ret = postcopy_chunk_hostpages(ms, block);
2504 if (ret) {
2505 return ret;
2506 }
2507
2508#ifdef DEBUG_POSTCOPY
2509 ram_debug_dump_bitmap(block->bmap, true,
2510 block->used_length >> TARGET_PAGE_BITS);
2511#endif
2512 }
2513 trace_ram_postcopy_send_discard_bitmap();
2514
2515 return postcopy_each_ram_send_discard(ms);
2516}
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2529{
2530 trace_ram_discard_range(rbname, start, length);
2531
2532 RCU_READ_LOCK_GUARD();
2533 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2534
2535 if (!rb) {
2536 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2537 return -1;
2538 }
2539
2540
2541
2542
2543
2544 if (rb->receivedmap) {
2545 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2546 length >> qemu_target_page_bits());
2547 }
2548
2549 return ram_block_discard_range(rb, start, length);
2550}
2551
2552
2553
2554
2555
2556static int xbzrle_init(void)
2557{
2558 Error *local_err = NULL;
2559
2560 if (!migrate_use_xbzrle()) {
2561 return 0;
2562 }
2563
2564 XBZRLE_cache_lock();
2565
2566 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2567 if (!XBZRLE.zero_target_page) {
2568 error_report("%s: Error allocating zero page", __func__);
2569 goto err_out;
2570 }
2571
2572 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2573 TARGET_PAGE_SIZE, &local_err);
2574 if (!XBZRLE.cache) {
2575 error_report_err(local_err);
2576 goto free_zero_page;
2577 }
2578
2579 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2580 if (!XBZRLE.encoded_buf) {
2581 error_report("%s: Error allocating encoded_buf", __func__);
2582 goto free_cache;
2583 }
2584
2585 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2586 if (!XBZRLE.current_buf) {
2587 error_report("%s: Error allocating current_buf", __func__);
2588 goto free_encoded_buf;
2589 }
2590
2591
2592 XBZRLE_cache_unlock();
2593 return 0;
2594
2595free_encoded_buf:
2596 g_free(XBZRLE.encoded_buf);
2597 XBZRLE.encoded_buf = NULL;
2598free_cache:
2599 cache_fini(XBZRLE.cache);
2600 XBZRLE.cache = NULL;
2601free_zero_page:
2602 g_free(XBZRLE.zero_target_page);
2603 XBZRLE.zero_target_page = NULL;
2604err_out:
2605 XBZRLE_cache_unlock();
2606 return -ENOMEM;
2607}
2608
2609static int ram_state_init(RAMState **rsp)
2610{
2611 *rsp = g_try_new0(RAMState, 1);
2612
2613 if (!*rsp) {
2614 error_report("%s: Init ramstate fail", __func__);
2615 return -1;
2616 }
2617
2618 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2619 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2620 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2621
2622
2623
2624
2625
2626
2627 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2628 ram_state_reset(*rsp);
2629
2630 return 0;
2631}
2632
2633static void ram_list_init_bitmaps(void)
2634{
2635 MigrationState *ms = migrate_get_current();
2636 RAMBlock *block;
2637 unsigned long pages;
2638 uint8_t shift;
2639
2640
2641 if (ram_bytes_total()) {
2642 shift = ms->clear_bitmap_shift;
2643 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2644 error_report("clear_bitmap_shift (%u) too big, using "
2645 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2646 shift = CLEAR_BITMAP_SHIFT_MAX;
2647 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2648 error_report("clear_bitmap_shift (%u) too small, using "
2649 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2650 shift = CLEAR_BITMAP_SHIFT_MIN;
2651 }
2652
2653 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2654 pages = block->max_length >> TARGET_PAGE_BITS;
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664 block->bmap = bitmap_new(pages);
2665 bitmap_set(block->bmap, 0, pages);
2666 block->clear_bmap_shift = shift;
2667 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2668 }
2669 }
2670}
2671
2672static void ram_init_bitmaps(RAMState *rs)
2673{
2674
2675 qemu_mutex_lock_iothread();
2676 qemu_mutex_lock_ramlist();
2677
2678 WITH_RCU_READ_LOCK_GUARD() {
2679 ram_list_init_bitmaps();
2680
2681 if (!migrate_background_snapshot()) {
2682 memory_global_dirty_log_start();
2683 migration_bitmap_sync_precopy(rs);
2684 }
2685 }
2686 qemu_mutex_unlock_ramlist();
2687 qemu_mutex_unlock_iothread();
2688}
2689
2690static int ram_init_all(RAMState **rsp)
2691{
2692 if (ram_state_init(rsp)) {
2693 return -1;
2694 }
2695
2696 if (xbzrle_init()) {
2697 ram_state_cleanup(rsp);
2698 return -1;
2699 }
2700
2701 ram_init_bitmaps(*rsp);
2702
2703 return 0;
2704}
2705
2706static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2707{
2708 RAMBlock *block;
2709 uint64_t pages = 0;
2710
2711
2712
2713
2714
2715
2716
2717 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2718 pages += bitmap_count_one(block->bmap,
2719 block->used_length >> TARGET_PAGE_BITS);
2720 }
2721
2722
2723 rs->migration_dirty_pages = pages;
2724
2725 rs->last_seen_block = NULL;
2726 rs->last_sent_block = NULL;
2727 rs->last_page = 0;
2728 rs->last_version = ram_list.version;
2729
2730
2731
2732
2733 rs->ram_bulk_stage = false;
2734
2735
2736 rs->f = out;
2737
2738 trace_ram_state_resume_prepare(pages);
2739}
2740
2741
2742
2743
2744
2745
2746
2747void qemu_guest_free_page_hint(void *addr, size_t len)
2748{
2749 RAMBlock *block;
2750 ram_addr_t offset;
2751 size_t used_len, start, npages;
2752 MigrationState *s = migrate_get_current();
2753
2754
2755 if (!migration_is_setup_or_active(s->state)) {
2756 return;
2757 }
2758
2759 for (; len > 0; len -= used_len, addr += used_len) {
2760 block = qemu_ram_block_from_host(addr, false, &offset);
2761 if (unlikely(!block || offset >= block->used_length)) {
2762
2763
2764
2765
2766
2767 error_report_once("%s unexpected error", __func__);
2768 return;
2769 }
2770
2771 if (len <= block->used_length - offset) {
2772 used_len = len;
2773 } else {
2774 used_len = block->used_length - offset;
2775 }
2776
2777 start = offset >> TARGET_PAGE_BITS;
2778 npages = used_len >> TARGET_PAGE_BITS;
2779
2780 qemu_mutex_lock(&ram_state->bitmap_mutex);
2781 ram_state->migration_dirty_pages -=
2782 bitmap_count_one_with_offset(block->bmap, start, npages);
2783 bitmap_clear(block->bmap, start, npages);
2784 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2785 }
2786}
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803static int ram_save_setup(QEMUFile *f, void *opaque)
2804{
2805 RAMState **rsp = opaque;
2806 RAMBlock *block;
2807
2808 if (compress_threads_save_setup()) {
2809 return -1;
2810 }
2811
2812
2813 if (!migration_in_colo_state()) {
2814 if (ram_init_all(rsp) != 0) {
2815 compress_threads_save_cleanup();
2816 return -1;
2817 }
2818 }
2819 (*rsp)->f = f;
2820
2821 WITH_RCU_READ_LOCK_GUARD() {
2822 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
2823
2824 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2825 qemu_put_byte(f, strlen(block->idstr));
2826 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2827 qemu_put_be64(f, block->used_length);
2828 if (migrate_postcopy_ram() && block->page_size !=
2829 qemu_host_page_size) {
2830 qemu_put_be64(f, block->page_size);
2831 }
2832 if (migrate_ignore_shared()) {
2833 qemu_put_be64(f, block->mr->addr);
2834 }
2835 }
2836 }
2837
2838 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2839 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2840
2841 multifd_send_sync_main(f);
2842 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2843 qemu_fflush(f);
2844
2845 return 0;
2846}
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856static int ram_save_iterate(QEMUFile *f, void *opaque)
2857{
2858 RAMState **temp = opaque;
2859 RAMState *rs = *temp;
2860 int ret = 0;
2861 int i;
2862 int64_t t0;
2863 int done = 0;
2864
2865 if (blk_mig_bulk_active()) {
2866
2867
2868
2869 goto out;
2870 }
2871
2872 WITH_RCU_READ_LOCK_GUARD() {
2873 if (ram_list.version != rs->last_version) {
2874 ram_state_reset(rs);
2875 }
2876
2877
2878 smp_rmb();
2879
2880 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2881
2882 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2883 i = 0;
2884 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2885 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2886 int pages;
2887
2888 if (qemu_file_get_error(f)) {
2889 break;
2890 }
2891
2892 pages = ram_find_and_save_block(rs, false);
2893
2894 if (pages == 0) {
2895 done = 1;
2896 break;
2897 }
2898
2899 if (pages < 0) {
2900 qemu_file_set_error(f, pages);
2901 break;
2902 }
2903
2904 rs->target_page_count += pages;
2905
2906
2907
2908
2909
2910 if (migrate_postcopy_ram()) {
2911 flush_compressed_data(rs);
2912 }
2913
2914
2915
2916
2917
2918
2919
2920 if ((i & 63) == 0) {
2921 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
2922 1000000;
2923 if (t1 > MAX_WAIT) {
2924 trace_ram_save_iterate_big_wait(t1, i);
2925 break;
2926 }
2927 }
2928 i++;
2929 }
2930 }
2931
2932
2933
2934
2935
2936 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2937
2938out:
2939 if (ret >= 0
2940 && migration_is_setup_or_active(migrate_get_current()->state)) {
2941 multifd_send_sync_main(rs->f);
2942 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2943 qemu_fflush(f);
2944 ram_counters.transferred += 8;
2945
2946 ret = qemu_file_get_error(f);
2947 }
2948 if (ret < 0) {
2949 return ret;
2950 }
2951
2952 return done;
2953}
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965static int ram_save_complete(QEMUFile *f, void *opaque)
2966{
2967 RAMState **temp = opaque;
2968 RAMState *rs = *temp;
2969 int ret = 0;
2970
2971 WITH_RCU_READ_LOCK_GUARD() {
2972 if (!migration_in_postcopy()) {
2973 migration_bitmap_sync_precopy(rs);
2974 }
2975
2976 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2977
2978
2979
2980
2981 while (true) {
2982 int pages;
2983
2984 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2985
2986 if (pages == 0) {
2987 break;
2988 }
2989 if (pages < 0) {
2990 ret = pages;
2991 break;
2992 }
2993 }
2994
2995 flush_compressed_data(rs);
2996 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2997 }
2998
2999 if (ret >= 0) {
3000 multifd_send_sync_main(rs->f);
3001 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3002 qemu_fflush(f);
3003 }
3004
3005 return ret;
3006}
3007
3008static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3009 uint64_t *res_precopy_only,
3010 uint64_t *res_compatible,
3011 uint64_t *res_postcopy_only)
3012{
3013 RAMState **temp = opaque;
3014 RAMState *rs = *temp;
3015 uint64_t remaining_size;
3016
3017 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3018
3019 if (!migration_in_postcopy() &&
3020 remaining_size < max_size) {
3021 qemu_mutex_lock_iothread();
3022 WITH_RCU_READ_LOCK_GUARD() {
3023 migration_bitmap_sync_precopy(rs);
3024 }
3025 qemu_mutex_unlock_iothread();
3026 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3027 }
3028
3029 if (migrate_postcopy_ram()) {
3030
3031 *res_compatible += remaining_size;
3032 } else {
3033 *res_precopy_only += remaining_size;
3034 }
3035}
3036
3037static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3038{
3039 unsigned int xh_len;
3040 int xh_flags;
3041 uint8_t *loaded_data;
3042
3043
3044 xh_flags = qemu_get_byte(f);
3045 xh_len = qemu_get_be16(f);
3046
3047 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3048 error_report("Failed to load XBZRLE page - wrong compression!");
3049 return -1;
3050 }
3051
3052 if (xh_len > TARGET_PAGE_SIZE) {
3053 error_report("Failed to load XBZRLE page - len overflow!");
3054 return -1;
3055 }
3056 loaded_data = XBZRLE.decoded_buf;
3057
3058
3059 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3060
3061
3062 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3063 TARGET_PAGE_SIZE) == -1) {
3064 error_report("Failed to load XBZRLE page - decode error!");
3065 return -1;
3066 }
3067
3068 return 0;
3069}
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3082{
3083 static RAMBlock *block;
3084 char id[256];
3085 uint8_t len;
3086
3087 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3088 if (!block) {
3089 error_report("Ack, bad migration stream!");
3090 return NULL;
3091 }
3092 return block;
3093 }
3094
3095 len = qemu_get_byte(f);
3096 qemu_get_buffer(f, (uint8_t *)id, len);
3097 id[len] = 0;
3098
3099 block = qemu_ram_block_by_name(id);
3100 if (!block) {
3101 error_report("Can't find block %s", id);
3102 return NULL;
3103 }
3104
3105 if (ramblock_is_ignored(block)) {
3106 error_report("block %s should not be migrated !", id);
3107 return NULL;
3108 }
3109
3110 return block;
3111}
3112
3113static inline void *host_from_ram_block_offset(RAMBlock *block,
3114 ram_addr_t offset)
3115{
3116 if (!offset_in_ramblock(block, offset)) {
3117 return NULL;
3118 }
3119
3120 return block->host + offset;
3121}
3122
3123static inline void *colo_cache_from_block_offset(RAMBlock *block,
3124 ram_addr_t offset, bool record_bitmap)
3125{
3126 if (!offset_in_ramblock(block, offset)) {
3127 return NULL;
3128 }
3129 if (!block->colo_cache) {
3130 error_report("%s: colo_cache is NULL in block :%s",
3131 __func__, block->idstr);
3132 return NULL;
3133 }
3134
3135
3136
3137
3138
3139
3140 if (record_bitmap &&
3141 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3142 ram_state->migration_dirty_pages++;
3143 }
3144 return block->colo_cache + offset;
3145}
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3158{
3159 if (ch != 0 || !is_zero_range(host, size)) {
3160 memset(host, ch, size);
3161 }
3162}
3163
3164
3165static int
3166qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3167 const uint8_t *source, size_t source_len)
3168{
3169 int err;
3170
3171 err = inflateReset(stream);
3172 if (err != Z_OK) {
3173 return -1;
3174 }
3175
3176 stream->avail_in = source_len;
3177 stream->next_in = (uint8_t *)source;
3178 stream->avail_out = dest_len;
3179 stream->next_out = dest;
3180
3181 err = inflate(stream, Z_NO_FLUSH);
3182 if (err != Z_STREAM_END) {
3183 return -1;
3184 }
3185
3186 return stream->total_out;
3187}
3188
3189static void *do_data_decompress(void *opaque)
3190{
3191 DecompressParam *param = opaque;
3192 unsigned long pagesize;
3193 uint8_t *des;
3194 int len, ret;
3195
3196 qemu_mutex_lock(¶m->mutex);
3197 while (!param->quit) {
3198 if (param->des) {
3199 des = param->des;
3200 len = param->len;
3201 param->des = 0;
3202 qemu_mutex_unlock(¶m->mutex);
3203
3204 pagesize = TARGET_PAGE_SIZE;
3205
3206 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3207 param->compbuf, len);
3208 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3209 error_report("decompress data failed");
3210 qemu_file_set_error(decomp_file, ret);
3211 }
3212
3213 qemu_mutex_lock(&decomp_done_lock);
3214 param->done = true;
3215 qemu_cond_signal(&decomp_done_cond);
3216 qemu_mutex_unlock(&decomp_done_lock);
3217
3218 qemu_mutex_lock(¶m->mutex);
3219 } else {
3220 qemu_cond_wait(¶m->cond, ¶m->mutex);
3221 }
3222 }
3223 qemu_mutex_unlock(¶m->mutex);
3224
3225 return NULL;
3226}
3227
3228static int wait_for_decompress_done(void)
3229{
3230 int idx, thread_count;
3231
3232 if (!migrate_use_compression()) {
3233 return 0;
3234 }
3235
3236 thread_count = migrate_decompress_threads();
3237 qemu_mutex_lock(&decomp_done_lock);
3238 for (idx = 0; idx < thread_count; idx++) {
3239 while (!decomp_param[idx].done) {
3240 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3241 }
3242 }
3243 qemu_mutex_unlock(&decomp_done_lock);
3244 return qemu_file_get_error(decomp_file);
3245}
3246
3247static void compress_threads_load_cleanup(void)
3248{
3249 int i, thread_count;
3250
3251 if (!migrate_use_compression()) {
3252 return;
3253 }
3254 thread_count = migrate_decompress_threads();
3255 for (i = 0; i < thread_count; i++) {
3256
3257
3258
3259
3260 if (!decomp_param[i].compbuf) {
3261 break;
3262 }
3263
3264 qemu_mutex_lock(&decomp_param[i].mutex);
3265 decomp_param[i].quit = true;
3266 qemu_cond_signal(&decomp_param[i].cond);
3267 qemu_mutex_unlock(&decomp_param[i].mutex);
3268 }
3269 for (i = 0; i < thread_count; i++) {
3270 if (!decomp_param[i].compbuf) {
3271 break;
3272 }
3273
3274 qemu_thread_join(decompress_threads + i);
3275 qemu_mutex_destroy(&decomp_param[i].mutex);
3276 qemu_cond_destroy(&decomp_param[i].cond);
3277 inflateEnd(&decomp_param[i].stream);
3278 g_free(decomp_param[i].compbuf);
3279 decomp_param[i].compbuf = NULL;
3280 }
3281 g_free(decompress_threads);
3282 g_free(decomp_param);
3283 decompress_threads = NULL;
3284 decomp_param = NULL;
3285 decomp_file = NULL;
3286}
3287
3288static int compress_threads_load_setup(QEMUFile *f)
3289{
3290 int i, thread_count;
3291
3292 if (!migrate_use_compression()) {
3293 return 0;
3294 }
3295
3296 thread_count = migrate_decompress_threads();
3297 decompress_threads = g_new0(QemuThread, thread_count);
3298 decomp_param = g_new0(DecompressParam, thread_count);
3299 qemu_mutex_init(&decomp_done_lock);
3300 qemu_cond_init(&decomp_done_cond);
3301 decomp_file = f;
3302 for (i = 0; i < thread_count; i++) {
3303 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3304 goto exit;
3305 }
3306
3307 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3308 qemu_mutex_init(&decomp_param[i].mutex);
3309 qemu_cond_init(&decomp_param[i].cond);
3310 decomp_param[i].done = true;
3311 decomp_param[i].quit = false;
3312 qemu_thread_create(decompress_threads + i, "decompress",
3313 do_data_decompress, decomp_param + i,
3314 QEMU_THREAD_JOINABLE);
3315 }
3316 return 0;
3317exit:
3318 compress_threads_load_cleanup();
3319 return -1;
3320}
3321
3322static void decompress_data_with_multi_threads(QEMUFile *f,
3323 void *host, int len)
3324{
3325 int idx, thread_count;
3326
3327 thread_count = migrate_decompress_threads();
3328 QEMU_LOCK_GUARD(&decomp_done_lock);
3329 while (true) {
3330 for (idx = 0; idx < thread_count; idx++) {
3331 if (decomp_param[idx].done) {
3332 decomp_param[idx].done = false;
3333 qemu_mutex_lock(&decomp_param[idx].mutex);
3334 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3335 decomp_param[idx].des = host;
3336 decomp_param[idx].len = len;
3337 qemu_cond_signal(&decomp_param[idx].cond);
3338 qemu_mutex_unlock(&decomp_param[idx].mutex);
3339 break;
3340 }
3341 }
3342 if (idx < thread_count) {
3343 break;
3344 } else {
3345 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3346 }
3347 }
3348}
3349
3350
3351
3352
3353
3354
3355
3356static void colo_init_ram_state(void)
3357{
3358 ram_state_init(&ram_state);
3359 ram_state->ram_bulk_stage = false;
3360}
3361
3362
3363
3364
3365
3366
3367int colo_init_ram_cache(void)
3368{
3369 RAMBlock *block;
3370
3371 WITH_RCU_READ_LOCK_GUARD() {
3372 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3373 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3374 NULL,
3375 false);
3376 if (!block->colo_cache) {
3377 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3378 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3379 block->used_length);
3380 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3381 if (block->colo_cache) {
3382 qemu_anon_ram_free(block->colo_cache, block->used_length);
3383 block->colo_cache = NULL;
3384 }
3385 }
3386 return -errno;
3387 }
3388 }
3389 }
3390
3391
3392
3393
3394
3395
3396 if (ram_bytes_total()) {
3397 RAMBlock *block;
3398
3399 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3400 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3401 block->bmap = bitmap_new(pages);
3402 }
3403 }
3404
3405 colo_init_ram_state();
3406 return 0;
3407}
3408
3409
3410void colo_incoming_start_dirty_log(void)
3411{
3412 RAMBlock *block = NULL;
3413
3414 qemu_mutex_lock_iothread();
3415 qemu_mutex_lock_ramlist();
3416
3417 memory_global_dirty_log_sync();
3418 WITH_RCU_READ_LOCK_GUARD() {
3419 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3420 ramblock_sync_dirty_bitmap(ram_state, block);
3421
3422 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3423 }
3424 memory_global_dirty_log_start();
3425 }
3426 ram_state->migration_dirty_pages = 0;
3427 qemu_mutex_unlock_ramlist();
3428 qemu_mutex_unlock_iothread();
3429}
3430
3431
3432void colo_release_ram_cache(void)
3433{
3434 RAMBlock *block;
3435
3436 memory_global_dirty_log_stop();
3437 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3438 g_free(block->bmap);
3439 block->bmap = NULL;
3440 }
3441
3442 WITH_RCU_READ_LOCK_GUARD() {
3443 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3444 if (block->colo_cache) {
3445 qemu_anon_ram_free(block->colo_cache, block->used_length);
3446 block->colo_cache = NULL;
3447 }
3448 }
3449 }
3450 ram_state_cleanup(&ram_state);
3451}
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461static int ram_load_setup(QEMUFile *f, void *opaque)
3462{
3463 if (compress_threads_load_setup(f)) {
3464 return -1;
3465 }
3466
3467 xbzrle_load_setup();
3468 ramblock_recv_map_init();
3469
3470 return 0;
3471}
3472
3473static int ram_load_cleanup(void *opaque)
3474{
3475 RAMBlock *rb;
3476
3477 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3478 qemu_ram_block_writeback(rb);
3479 }
3480
3481 xbzrle_load_cleanup();
3482 compress_threads_load_cleanup();
3483
3484 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3485 g_free(rb->receivedmap);
3486 rb->receivedmap = NULL;
3487 }
3488
3489 return 0;
3490}
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3504{
3505 return postcopy_ram_incoming_init(mis);
3506}
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518static int ram_load_postcopy(QEMUFile *f)
3519{
3520 int flags = 0, ret = 0;
3521 bool place_needed = false;
3522 bool matches_target_page_size = false;
3523 MigrationIncomingState *mis = migration_incoming_get_current();
3524
3525 void *postcopy_host_page = mis->postcopy_tmp_page;
3526 void *this_host = NULL;
3527 bool all_zero = true;
3528 int target_pages = 0;
3529
3530 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3531 ram_addr_t addr;
3532 void *host = NULL;
3533 void *page_buffer = NULL;
3534 void *place_source = NULL;
3535 RAMBlock *block = NULL;
3536 uint8_t ch;
3537 int len;
3538
3539 addr = qemu_get_be64(f);
3540
3541
3542
3543
3544
3545 ret = qemu_file_get_error(f);
3546 if (ret) {
3547 break;
3548 }
3549
3550 flags = addr & ~TARGET_PAGE_MASK;
3551 addr &= TARGET_PAGE_MASK;
3552
3553 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3554 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3555 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
3556 block = ram_block_from_stream(f, flags);
3557
3558 host = host_from_ram_block_offset(block, addr);
3559 if (!host) {
3560 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3561 ret = -EINVAL;
3562 break;
3563 }
3564 target_pages++;
3565 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576 page_buffer = postcopy_host_page +
3577 ((uintptr_t)host & (block->page_size - 1));
3578 if (target_pages == 1) {
3579 this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3580 block->page_size);
3581 } else {
3582
3583 if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
3584 (uintptr_t)this_host) {
3585 error_report("Non-same host page %p/%p",
3586 host, this_host);
3587 ret = -EINVAL;
3588 break;
3589 }
3590 }
3591
3592
3593
3594
3595
3596 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3597 place_needed = true;
3598 }
3599 place_source = postcopy_host_page;
3600 }
3601
3602 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3603 case RAM_SAVE_FLAG_ZERO:
3604 ch = qemu_get_byte(f);
3605
3606
3607
3608
3609 if (ch || !matches_target_page_size) {
3610 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3611 }
3612 if (ch) {
3613 all_zero = false;
3614 }
3615 break;
3616
3617 case RAM_SAVE_FLAG_PAGE:
3618 all_zero = false;
3619 if (!matches_target_page_size) {
3620
3621 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3622 } else {
3623
3624
3625
3626
3627
3628
3629
3630
3631 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3632 TARGET_PAGE_SIZE);
3633 }
3634 break;
3635 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3636 all_zero = false;
3637 len = qemu_get_be32(f);
3638 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3639 error_report("Invalid compressed data length: %d", len);
3640 ret = -EINVAL;
3641 break;
3642 }
3643 decompress_data_with_multi_threads(f, page_buffer, len);
3644 break;
3645
3646 case RAM_SAVE_FLAG_EOS:
3647
3648 multifd_recv_sync_main();
3649 break;
3650 default:
3651 error_report("Unknown combination of migration flags: 0x%x"
3652 " (postcopy mode)", flags);
3653 ret = -EINVAL;
3654 break;
3655 }
3656
3657
3658 if (place_needed) {
3659 ret |= wait_for_decompress_done();
3660 }
3661
3662
3663 if (!ret && qemu_file_get_error(f)) {
3664 ret = qemu_file_get_error(f);
3665 }
3666
3667 if (!ret && place_needed) {
3668
3669 void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3670 block->page_size);
3671
3672 if (all_zero) {
3673 ret = postcopy_place_page_zero(mis, place_dest,
3674 block);
3675 } else {
3676 ret = postcopy_place_page(mis, place_dest,
3677 place_source, block);
3678 }
3679 place_needed = false;
3680 target_pages = 0;
3681
3682 all_zero = true;
3683 }
3684 }
3685
3686 return ret;
3687}
3688
3689static bool postcopy_is_advised(void)
3690{
3691 PostcopyState ps = postcopy_state_get();
3692 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3693}
3694
3695static bool postcopy_is_running(void)
3696{
3697 PostcopyState ps = postcopy_state_get();
3698 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3699}
3700
3701
3702
3703
3704
3705void colo_flush_ram_cache(void)
3706{
3707 RAMBlock *block = NULL;
3708 void *dst_host;
3709 void *src_host;
3710 unsigned long offset = 0;
3711
3712 memory_global_dirty_log_sync();
3713 WITH_RCU_READ_LOCK_GUARD() {
3714 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3715 ramblock_sync_dirty_bitmap(ram_state, block);
3716 }
3717 }
3718
3719 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3720 WITH_RCU_READ_LOCK_GUARD() {
3721 block = QLIST_FIRST_RCU(&ram_list.blocks);
3722
3723 while (block) {
3724 offset = migration_bitmap_find_dirty(ram_state, block, offset);
3725
3726 if (((ram_addr_t)offset) << TARGET_PAGE_BITS
3727 >= block->used_length) {
3728 offset = 0;
3729 block = QLIST_NEXT_RCU(block, next);
3730 } else {
3731 migration_bitmap_clear_dirty(ram_state, block, offset);
3732 dst_host = block->host
3733 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3734 src_host = block->colo_cache
3735 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3736 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
3737 }
3738 }
3739 }
3740 trace_colo_flush_ram_cache_end();
3741}
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753static int ram_load_precopy(QEMUFile *f)
3754{
3755 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
3756
3757 bool postcopy_advised = postcopy_is_advised();
3758 if (!migrate_use_compression()) {
3759 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3760 }
3761
3762 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3763 ram_addr_t addr, total_ram_bytes;
3764 void *host = NULL, *host_bak = NULL;
3765 uint8_t ch;
3766
3767
3768
3769
3770
3771 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3772 aio_co_schedule(qemu_get_current_aio_context(),
3773 qemu_coroutine_self());
3774 qemu_coroutine_yield();
3775 }
3776 i++;
3777
3778 addr = qemu_get_be64(f);
3779 flags = addr & ~TARGET_PAGE_MASK;
3780 addr &= TARGET_PAGE_MASK;
3781
3782 if (flags & invalid_flags) {
3783 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3784 error_report("Received an unexpected compressed page");
3785 }
3786
3787 ret = -EINVAL;
3788 break;
3789 }
3790
3791 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3792 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3793 RAMBlock *block = ram_block_from_stream(f, flags);
3794
3795 host = host_from_ram_block_offset(block, addr);
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807 if (migration_incoming_colo_enabled()) {
3808 if (migration_incoming_in_colo_state()) {
3809
3810 host = colo_cache_from_block_offset(block, addr, true);
3811 } else {
3812
3813
3814
3815
3816 host_bak = colo_cache_from_block_offset(block, addr, false);
3817 }
3818 }
3819 if (!host) {
3820 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3821 ret = -EINVAL;
3822 break;
3823 }
3824 if (!migration_incoming_in_colo_state()) {
3825 ramblock_recv_bitmap_set(block, host);
3826 }
3827
3828 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3829 }
3830
3831 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3832 case RAM_SAVE_FLAG_MEM_SIZE:
3833
3834 total_ram_bytes = addr;
3835 while (!ret && total_ram_bytes) {
3836 RAMBlock *block;
3837 char id[256];
3838 ram_addr_t length;
3839
3840 len = qemu_get_byte(f);
3841 qemu_get_buffer(f, (uint8_t *)id, len);
3842 id[len] = 0;
3843 length = qemu_get_be64(f);
3844
3845 block = qemu_ram_block_by_name(id);
3846 if (block && !qemu_ram_is_migratable(block)) {
3847 error_report("block %s should not be migrated !", id);
3848 ret = -EINVAL;
3849 } else if (block) {
3850 if (length != block->used_length) {
3851 Error *local_err = NULL;
3852
3853 ret = qemu_ram_resize(block, length,
3854 &local_err);
3855 if (local_err) {
3856 error_report_err(local_err);
3857 }
3858 }
3859
3860 if (postcopy_advised && migrate_postcopy_ram() &&
3861 block->page_size != qemu_host_page_size) {
3862 uint64_t remote_page_size = qemu_get_be64(f);
3863 if (remote_page_size != block->page_size) {
3864 error_report("Mismatched RAM page size %s "
3865 "(local) %zd != %" PRId64,
3866 id, block->page_size,
3867 remote_page_size);
3868 ret = -EINVAL;
3869 }
3870 }
3871 if (migrate_ignore_shared()) {
3872 hwaddr addr = qemu_get_be64(f);
3873 if (ramblock_is_ignored(block) &&
3874 block->mr->addr != addr) {
3875 error_report("Mismatched GPAs for block %s "
3876 "%" PRId64 "!= %" PRId64,
3877 id, (uint64_t)addr,
3878 (uint64_t)block->mr->addr);
3879 ret = -EINVAL;
3880 }
3881 }
3882 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3883 block->idstr);
3884 } else {
3885 error_report("Unknown ramblock \"%s\", cannot "
3886 "accept migration", id);
3887 ret = -EINVAL;
3888 }
3889
3890 total_ram_bytes -= length;
3891 }
3892 break;
3893
3894 case RAM_SAVE_FLAG_ZERO:
3895 ch = qemu_get_byte(f);
3896 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3897 break;
3898
3899 case RAM_SAVE_FLAG_PAGE:
3900 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3901 break;
3902
3903 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3904 len = qemu_get_be32(f);
3905 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3906 error_report("Invalid compressed data length: %d", len);
3907 ret = -EINVAL;
3908 break;
3909 }
3910 decompress_data_with_multi_threads(f, host, len);
3911 break;
3912
3913 case RAM_SAVE_FLAG_XBZRLE:
3914 if (load_xbzrle(f, addr, host) < 0) {
3915 error_report("Failed to decompress XBZRLE page at "
3916 RAM_ADDR_FMT, addr);
3917 ret = -EINVAL;
3918 break;
3919 }
3920 break;
3921 case RAM_SAVE_FLAG_EOS:
3922
3923 multifd_recv_sync_main();
3924 break;
3925 default:
3926 if (flags & RAM_SAVE_FLAG_HOOK) {
3927 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
3928 } else {
3929 error_report("Unknown combination of migration flags: 0x%x",
3930 flags);
3931 ret = -EINVAL;
3932 }
3933 }
3934 if (!ret) {
3935 ret = qemu_file_get_error(f);
3936 }
3937 if (!ret && host_bak) {
3938 memcpy(host_bak, host, TARGET_PAGE_SIZE);
3939 }
3940 }
3941
3942 ret |= wait_for_decompress_done();
3943 return ret;
3944}
3945
3946static int ram_load(QEMUFile *f, void *opaque, int version_id)
3947{
3948 int ret = 0;
3949 static uint64_t seq_iter;
3950
3951
3952
3953
3954 bool postcopy_running = postcopy_is_running();
3955
3956 seq_iter++;
3957
3958 if (version_id != 4) {
3959 return -EINVAL;
3960 }
3961
3962
3963
3964
3965
3966
3967
3968 WITH_RCU_READ_LOCK_GUARD() {
3969 if (postcopy_running) {
3970 ret = ram_load_postcopy(f);
3971 } else {
3972 ret = ram_load_precopy(f);
3973 }
3974 }
3975 trace_ram_load_complete(ret, seq_iter);
3976
3977 return ret;
3978}
3979
3980static bool ram_has_postcopy(void *opaque)
3981{
3982 RAMBlock *rb;
3983 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3984 if (ramblock_is_pmem(rb)) {
3985 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
3986 "is not supported now!", rb->idstr, rb->host);
3987 return false;
3988 }
3989 }
3990
3991 return migrate_postcopy_ram();
3992}
3993
3994
3995static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3996{
3997 RAMBlock *block;
3998 QEMUFile *file = s->to_dst_file;
3999 int ramblock_count = 0;
4000
4001 trace_ram_dirty_bitmap_sync_start();
4002
4003 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4004 qemu_savevm_send_recv_bitmap(file, block->idstr);
4005 trace_ram_dirty_bitmap_request(block->idstr);
4006 ramblock_count++;
4007 }
4008
4009 trace_ram_dirty_bitmap_sync_wait();
4010
4011
4012 while (ramblock_count--) {
4013 qemu_sem_wait(&s->rp_state.rp_sem);
4014 }
4015
4016 trace_ram_dirty_bitmap_sync_complete();
4017
4018 return 0;
4019}
4020
4021static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4022{
4023 qemu_sem_post(&s->rp_state.rp_sem);
4024}
4025
4026
4027
4028
4029
4030
4031int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4032{
4033 int ret = -EINVAL;
4034 QEMUFile *file = s->rp_state.from_dst_file;
4035 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4036 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4037 uint64_t size, end_mark;
4038
4039 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4040
4041 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4042 error_report("%s: incorrect state %s", __func__,
4043 MigrationStatus_str(s->state));
4044 return -EINVAL;
4045 }
4046
4047
4048
4049
4050
4051 local_size = ROUND_UP(local_size, 8);
4052
4053
4054 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4055
4056 size = qemu_get_be64(file);
4057
4058
4059 if (size != local_size) {
4060 error_report("%s: ramblock '%s' bitmap size mismatch "
4061 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4062 block->idstr, size, local_size);
4063 ret = -EINVAL;
4064 goto out;
4065 }
4066
4067 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4068 end_mark = qemu_get_be64(file);
4069
4070 ret = qemu_file_get_error(file);
4071 if (ret || size != local_size) {
4072 error_report("%s: read bitmap failed for ramblock '%s': %d"
4073 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4074 __func__, block->idstr, ret, local_size, size);
4075 ret = -EIO;
4076 goto out;
4077 }
4078
4079 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4080 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
4081 __func__, block->idstr, end_mark);
4082 ret = -EINVAL;
4083 goto out;
4084 }
4085
4086
4087
4088
4089
4090 bitmap_from_le(block->bmap, le_bitmap, nbits);
4091
4092
4093
4094
4095
4096 bitmap_complement(block->bmap, block->bmap, nbits);
4097
4098 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4099
4100
4101
4102
4103
4104 ram_dirty_bitmap_reload_notify(s);
4105
4106 ret = 0;
4107out:
4108 g_free(le_bitmap);
4109 return ret;
4110}
4111
4112static int ram_resume_prepare(MigrationState *s, void *opaque)
4113{
4114 RAMState *rs = *(RAMState **)opaque;
4115 int ret;
4116
4117 ret = ram_dirty_bitmap_sync_all(s, rs);
4118 if (ret) {
4119 return ret;
4120 }
4121
4122 ram_state_resume_prepare(rs, s->to_dst_file);
4123
4124 return 0;
4125}
4126
4127static SaveVMHandlers savevm_ram_handlers = {
4128 .save_setup = ram_save_setup,
4129 .save_live_iterate = ram_save_iterate,
4130 .save_live_complete_postcopy = ram_save_complete,
4131 .save_live_complete_precopy = ram_save_complete,
4132 .has_postcopy = ram_has_postcopy,
4133 .save_live_pending = ram_save_pending,
4134 .load_state = ram_load,
4135 .save_cleanup = ram_save_cleanup,
4136 .load_setup = ram_load_setup,
4137 .load_cleanup = ram_load_cleanup,
4138 .resume_prepare = ram_resume_prepare,
4139};
4140
4141void ram_mig_init(void)
4142{
4143 qemu_mutex_init(&XBZRLE.lock);
4144 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4145}
4146