1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "qemu/cutils.h"
31#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
33#include "qemu/main-loop.h"
34#include "xbzrle.h"
35#include "ram.h"
36#include "migration.h"
37#include "migration/register.h"
38#include "migration/misc.h"
39#include "qemu-file.h"
40#include "postcopy-ram.h"
41#include "page_cache.h"
42#include "qemu/error-report.h"
43#include "qapi/error.h"
44#include "qapi/qapi-types-migration.h"
45#include "qapi/qapi-events-migration.h"
46#include "qapi/qmp/qerror.h"
47#include "trace.h"
48#include "exec/ram_addr.h"
49#include "exec/target_page.h"
50#include "qemu/rcu_queue.h"
51#include "migration/colo.h"
52#include "block.h"
53#include "sysemu/cpu-throttle.h"
54#include "savevm.h"
55#include "qemu/iov.h"
56#include "multifd.h"
57#include "sysemu/runstate.h"
58
59#include "hw/boards.h"
60
61#if defined(__linux__)
62#include "qemu/userfaultfd.h"
63#endif
64
65
66
67
68
69
70
71
72
73
74#define RAM_SAVE_FLAG_FULL 0x01
75#define RAM_SAVE_FLAG_ZERO 0x02
76#define RAM_SAVE_FLAG_MEM_SIZE 0x04
77#define RAM_SAVE_FLAG_PAGE 0x08
78#define RAM_SAVE_FLAG_EOS 0x10
79#define RAM_SAVE_FLAG_CONTINUE 0x20
80#define RAM_SAVE_FLAG_XBZRLE 0x40
81
82#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
83
84static inline bool is_zero_range(uint8_t *p, uint64_t size)
85{
86 return buffer_is_zero(p, size);
87}
88
89XBZRLECacheStats xbzrle_counters;
90
91
92
93static struct {
94
95 uint8_t *encoded_buf;
96
97 uint8_t *current_buf;
98
99 PageCache *cache;
100 QemuMutex lock;
101
102 uint8_t *zero_target_page;
103
104 uint8_t *decoded_buf;
105} XBZRLE;
106
107static void XBZRLE_cache_lock(void)
108{
109 if (migrate_use_xbzrle()) {
110 qemu_mutex_lock(&XBZRLE.lock);
111 }
112}
113
114static void XBZRLE_cache_unlock(void)
115{
116 if (migrate_use_xbzrle()) {
117 qemu_mutex_unlock(&XBZRLE.lock);
118 }
119}
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134int xbzrle_cache_resize(uint64_t new_size, Error **errp)
135{
136 PageCache *new_cache;
137 int64_t ret = 0;
138
139
140 if (new_size != (size_t)new_size) {
141 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
142 "exceeding address space");
143 return -1;
144 }
145
146 if (new_size == migrate_xbzrle_cache_size()) {
147
148 return 0;
149 }
150
151 XBZRLE_cache_lock();
152
153 if (XBZRLE.cache != NULL) {
154 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
155 if (!new_cache) {
156 ret = -1;
157 goto out;
158 }
159
160 cache_fini(XBZRLE.cache);
161 XBZRLE.cache = new_cache;
162 }
163out:
164 XBZRLE_cache_unlock();
165 return ret;
166}
167
168bool ramblock_is_ignored(RAMBlock *block)
169{
170 return !qemu_ram_is_migratable(block) ||
171 (migrate_ignore_shared() && qemu_ram_is_shared(block));
172}
173
174#undef RAMBLOCK_FOREACH
175
176int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
177{
178 RAMBlock *block;
179 int ret = 0;
180
181 RCU_READ_LOCK_GUARD();
182
183 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
184 ret = func(block, opaque);
185 if (ret) {
186 break;
187 }
188 }
189 return ret;
190}
191
192static void ramblock_recv_map_init(void)
193{
194 RAMBlock *rb;
195
196 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
197 assert(!rb->receivedmap);
198 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
199 }
200}
201
202int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
203{
204 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
205 rb->receivedmap);
206}
207
208bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
209{
210 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
211}
212
213void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
214{
215 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
216}
217
218void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
219 size_t nr)
220{
221 bitmap_set_atomic(rb->receivedmap,
222 ramblock_recv_bitmap_offset(host_addr, rb),
223 nr);
224}
225
226#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
227
228
229
230
231
232
233int64_t ramblock_recv_bitmap_send(QEMUFile *file,
234 const char *block_name)
235{
236 RAMBlock *block = qemu_ram_block_by_name(block_name);
237 unsigned long *le_bitmap, nbits;
238 uint64_t size;
239
240 if (!block) {
241 error_report("%s: invalid block name: %s", __func__, block_name);
242 return -1;
243 }
244
245 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
246
247
248
249
250
251
252 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
253
254
255
256
257
258
259 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
260
261
262 size = DIV_ROUND_UP(nbits, 8);
263
264
265
266
267
268
269
270 size = ROUND_UP(size, 8);
271
272 qemu_put_be64(file, size);
273 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
274
275
276
277
278 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
279 qemu_fflush(file);
280
281 g_free(le_bitmap);
282
283 if (qemu_file_get_error(file)) {
284 return qemu_file_get_error(file);
285 }
286
287 return size + sizeof(size);
288}
289
290
291
292
293
294struct RAMSrcPageRequest {
295 RAMBlock *rb;
296 hwaddr offset;
297 hwaddr len;
298
299 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
300};
301
302
303struct RAMState {
304
305 QEMUFile *f;
306
307 int uffdio_fd;
308
309 RAMBlock *last_seen_block;
310
311 RAMBlock *last_sent_block;
312
313 ram_addr_t last_page;
314
315 uint32_t last_version;
316
317 int dirty_rate_high_cnt;
318
319
320 int64_t time_last_bitmap_sync;
321
322 uint64_t bytes_xfer_prev;
323
324 uint64_t num_dirty_pages_period;
325
326 uint64_t xbzrle_cache_miss_prev;
327
328 uint64_t xbzrle_pages_prev;
329
330 uint64_t xbzrle_bytes_prev;
331
332 bool xbzrle_enabled;
333
334
335
336 uint64_t compress_thread_busy_prev;
337
338 uint64_t compressed_size_prev;
339
340 uint64_t compress_pages_prev;
341
342
343 uint64_t target_page_count_prev;
344
345 uint64_t target_page_count;
346
347 uint64_t migration_dirty_pages;
348
349 QemuMutex bitmap_mutex;
350
351 RAMBlock *last_req_rb;
352
353 QemuMutex src_page_req_mutex;
354 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
355};
356typedef struct RAMState RAMState;
357
358static RAMState *ram_state;
359
360static NotifierWithReturnList precopy_notifier_list;
361
362void precopy_infrastructure_init(void)
363{
364 notifier_with_return_list_init(&precopy_notifier_list);
365}
366
367void precopy_add_notifier(NotifierWithReturn *n)
368{
369 notifier_with_return_list_add(&precopy_notifier_list, n);
370}
371
372void precopy_remove_notifier(NotifierWithReturn *n)
373{
374 notifier_with_return_remove(n);
375}
376
377int precopy_notify(PrecopyNotifyReason reason, Error **errp)
378{
379 PrecopyNotifyData pnd;
380 pnd.reason = reason;
381 pnd.errp = errp;
382
383 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
384}
385
386uint64_t ram_bytes_remaining(void)
387{
388 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
389 0;
390}
391
392MigrationStats ram_counters;
393
394
395struct PageSearchStatus {
396
397 RAMBlock *block;
398
399 unsigned long page;
400
401 bool complete_round;
402};
403typedef struct PageSearchStatus PageSearchStatus;
404
405CompressionStats compression_counters;
406
407struct CompressParam {
408 bool done;
409 bool quit;
410 bool zero_page;
411 QEMUFile *file;
412 QemuMutex mutex;
413 QemuCond cond;
414 RAMBlock *block;
415 ram_addr_t offset;
416
417
418 z_stream stream;
419 uint8_t *originbuf;
420};
421typedef struct CompressParam CompressParam;
422
423struct DecompressParam {
424 bool done;
425 bool quit;
426 QemuMutex mutex;
427 QemuCond cond;
428 void *des;
429 uint8_t *compbuf;
430 int len;
431 z_stream stream;
432};
433typedef struct DecompressParam DecompressParam;
434
435static CompressParam *comp_param;
436static QemuThread *compress_threads;
437
438
439
440
441static QemuMutex comp_done_lock;
442static QemuCond comp_done_cond;
443
444static const QEMUFileOps empty_ops = { };
445
446static QEMUFile *decomp_file;
447static DecompressParam *decomp_param;
448static QemuThread *decompress_threads;
449static QemuMutex decomp_done_lock;
450static QemuCond decomp_done_cond;
451
452static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
453 ram_addr_t offset, uint8_t *source_buf);
454
455static void *do_data_compress(void *opaque)
456{
457 CompressParam *param = opaque;
458 RAMBlock *block;
459 ram_addr_t offset;
460 bool zero_page;
461
462 qemu_mutex_lock(¶m->mutex);
463 while (!param->quit) {
464 if (param->block) {
465 block = param->block;
466 offset = param->offset;
467 param->block = NULL;
468 qemu_mutex_unlock(¶m->mutex);
469
470 zero_page = do_compress_ram_page(param->file, ¶m->stream,
471 block, offset, param->originbuf);
472
473 qemu_mutex_lock(&comp_done_lock);
474 param->done = true;
475 param->zero_page = zero_page;
476 qemu_cond_signal(&comp_done_cond);
477 qemu_mutex_unlock(&comp_done_lock);
478
479 qemu_mutex_lock(¶m->mutex);
480 } else {
481 qemu_cond_wait(¶m->cond, ¶m->mutex);
482 }
483 }
484 qemu_mutex_unlock(¶m->mutex);
485
486 return NULL;
487}
488
489static void compress_threads_save_cleanup(void)
490{
491 int i, thread_count;
492
493 if (!migrate_use_compression() || !comp_param) {
494 return;
495 }
496
497 thread_count = migrate_compress_threads();
498 for (i = 0; i < thread_count; i++) {
499
500
501
502
503 if (!comp_param[i].file) {
504 break;
505 }
506
507 qemu_mutex_lock(&comp_param[i].mutex);
508 comp_param[i].quit = true;
509 qemu_cond_signal(&comp_param[i].cond);
510 qemu_mutex_unlock(&comp_param[i].mutex);
511
512 qemu_thread_join(compress_threads + i);
513 qemu_mutex_destroy(&comp_param[i].mutex);
514 qemu_cond_destroy(&comp_param[i].cond);
515 deflateEnd(&comp_param[i].stream);
516 g_free(comp_param[i].originbuf);
517 qemu_fclose(comp_param[i].file);
518 comp_param[i].file = NULL;
519 }
520 qemu_mutex_destroy(&comp_done_lock);
521 qemu_cond_destroy(&comp_done_cond);
522 g_free(compress_threads);
523 g_free(comp_param);
524 compress_threads = NULL;
525 comp_param = NULL;
526}
527
528static int compress_threads_save_setup(void)
529{
530 int i, thread_count;
531
532 if (!migrate_use_compression()) {
533 return 0;
534 }
535 thread_count = migrate_compress_threads();
536 compress_threads = g_new0(QemuThread, thread_count);
537 comp_param = g_new0(CompressParam, thread_count);
538 qemu_cond_init(&comp_done_cond);
539 qemu_mutex_init(&comp_done_lock);
540 for (i = 0; i < thread_count; i++) {
541 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
542 if (!comp_param[i].originbuf) {
543 goto exit;
544 }
545
546 if (deflateInit(&comp_param[i].stream,
547 migrate_compress_level()) != Z_OK) {
548 g_free(comp_param[i].originbuf);
549 goto exit;
550 }
551
552
553
554
555 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
556 comp_param[i].done = true;
557 comp_param[i].quit = false;
558 qemu_mutex_init(&comp_param[i].mutex);
559 qemu_cond_init(&comp_param[i].cond);
560 qemu_thread_create(compress_threads + i, "compress",
561 do_data_compress, comp_param + i,
562 QEMU_THREAD_JOINABLE);
563 }
564 return 0;
565
566exit:
567 compress_threads_save_cleanup();
568 return -1;
569}
570
571
572
573
574
575
576
577
578
579
580
581
582
583static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
584 ram_addr_t offset)
585{
586 size_t size, len;
587
588 if (block == rs->last_sent_block) {
589 offset |= RAM_SAVE_FLAG_CONTINUE;
590 }
591 qemu_put_be64(f, offset);
592 size = 8;
593
594 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
595 len = strlen(block->idstr);
596 qemu_put_byte(f, len);
597 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
598 size += 1 + len;
599 rs->last_sent_block = block;
600 }
601 return size;
602}
603
604
605
606
607
608
609
610
611
612
613static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
614 uint64_t bytes_dirty_threshold)
615{
616 MigrationState *s = migrate_get_current();
617 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
618 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
619 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
620 int pct_max = s->parameters.max_cpu_throttle;
621
622 uint64_t throttle_now = cpu_throttle_get_percentage();
623 uint64_t cpu_now, cpu_ideal, throttle_inc;
624
625
626 if (!cpu_throttle_active()) {
627 cpu_throttle_set(pct_initial);
628 } else {
629
630 if (!pct_tailslow) {
631 throttle_inc = pct_increment;
632 } else {
633
634
635 cpu_now = 100 - throttle_now;
636 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
637 bytes_dirty_period);
638 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
639 }
640 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
641 }
642}
643
644void mig_throttle_counter_reset(void)
645{
646 RAMState *rs = ram_state;
647
648 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
649 rs->num_dirty_pages_period = 0;
650 rs->bytes_xfer_prev = ram_counters.transferred;
651}
652
653
654
655
656
657
658
659
660
661
662
663
664
665static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
666{
667 if (!rs->xbzrle_enabled) {
668 return;
669 }
670
671
672
673 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
674 ram_counters.dirty_sync_count);
675}
676
677#define ENCODING_FLAG_XBZRLE 0x1
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
694 ram_addr_t current_addr, RAMBlock *block,
695 ram_addr_t offset, bool last_stage)
696{
697 int encoded_len = 0, bytes_xbzrle;
698 uint8_t *prev_cached_page;
699
700 if (!cache_is_cached(XBZRLE.cache, current_addr,
701 ram_counters.dirty_sync_count)) {
702 xbzrle_counters.cache_miss++;
703 if (!last_stage) {
704 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
705 ram_counters.dirty_sync_count) == -1) {
706 return -1;
707 } else {
708
709
710 *current_data = get_cached_data(XBZRLE.cache, current_addr);
711 }
712 }
713 return -1;
714 }
715
716
717
718
719
720
721
722
723
724
725
726
727 xbzrle_counters.pages++;
728 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
729
730
731 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
732
733
734 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
735 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
736 TARGET_PAGE_SIZE);
737
738
739
740
741
742 if (!last_stage && encoded_len != 0) {
743 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
744
745
746
747
748
749 *current_data = prev_cached_page;
750 }
751
752 if (encoded_len == 0) {
753 trace_save_xbzrle_page_skipping();
754 return 0;
755 } else if (encoded_len == -1) {
756 trace_save_xbzrle_page_overflow();
757 xbzrle_counters.overflow++;
758 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
759 return -1;
760 }
761
762
763 bytes_xbzrle = save_page_header(rs, rs->f, block,
764 offset | RAM_SAVE_FLAG_XBZRLE);
765 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
766 qemu_put_be16(rs->f, encoded_len);
767 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
768 bytes_xbzrle += encoded_len + 1 + 2;
769
770
771
772
773
774 xbzrle_counters.bytes += bytes_xbzrle - 8;
775 ram_counters.transferred += bytes_xbzrle;
776
777 return 1;
778}
779
780
781
782
783
784
785
786
787
788
789static inline
790unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
791 unsigned long start)
792{
793 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
794 unsigned long *bitmap = rb->bmap;
795
796 if (ramblock_is_ignored(rb)) {
797 return size;
798 }
799
800 return find_next_bit(bitmap, size, start);
801}
802
803static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
804 unsigned long page)
805{
806 uint8_t shift;
807 hwaddr size, start;
808
809 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
810 return;
811 }
812
813 shift = rb->clear_bmap_shift;
814
815
816
817
818
819
820
821
822 assert(shift >= 6);
823
824 size = 1ULL << (TARGET_PAGE_BITS + shift);
825 start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
826 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
827 memory_region_clear_dirty_bitmap(rb->mr, start, size);
828}
829
830static void
831migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
832 unsigned long start,
833 unsigned long npages)
834{
835 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
836 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
837 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
838
839
840
841
842
843 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
844 migration_clear_memory_region_dirty_bitmap(rb, i);
845 }
846}
847
848
849
850
851
852
853
854
855
856
857
858
859static inline
860unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
861 unsigned long start, unsigned long *num)
862{
863 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
864 unsigned long *bitmap = rb->bmap;
865 unsigned long first, next;
866
867 *num = 0;
868
869 if (ramblock_is_ignored(rb)) {
870 return size;
871 }
872
873 first = find_next_bit(bitmap, size, start);
874 if (first >= size) {
875 return first;
876 }
877 next = find_next_zero_bit(bitmap, size, first + 1);
878 assert(next >= first);
879 *num = next - first;
880 return first;
881}
882
883static inline bool migration_bitmap_clear_dirty(RAMState *rs,
884 RAMBlock *rb,
885 unsigned long page)
886{
887 bool ret;
888
889
890
891
892
893
894
895
896
897 migration_clear_memory_region_dirty_bitmap(rb, page);
898
899 ret = test_and_clear_bit(page, rb->bmap);
900 if (ret) {
901 rs->migration_dirty_pages--;
902 }
903
904 return ret;
905}
906
907static void dirty_bitmap_clear_section(MemoryRegionSection *section,
908 void *opaque)
909{
910 const hwaddr offset = section->offset_within_region;
911 const hwaddr size = int128_get64(section->size);
912 const unsigned long start = offset >> TARGET_PAGE_BITS;
913 const unsigned long npages = size >> TARGET_PAGE_BITS;
914 RAMBlock *rb = section->mr->ram_block;
915 uint64_t *cleared_bits = opaque;
916
917
918
919
920
921
922 if (!migration_in_postcopy() && !migrate_background_snapshot()) {
923 migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
924 }
925 *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
926 bitmap_clear(rb->bmap, start, npages);
927}
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
943{
944 uint64_t cleared_bits = 0;
945
946 if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
947 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
948 MemoryRegionSection section = {
949 .mr = rb->mr,
950 .offset_within_region = 0,
951 .size = int128_make64(qemu_ram_get_used_length(rb)),
952 };
953
954 ram_discard_manager_replay_discarded(rdm, §ion,
955 dirty_bitmap_clear_section,
956 &cleared_bits);
957 }
958 return cleared_bits;
959}
960
961
962
963
964
965
966
967bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
968{
969 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
970 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
971 MemoryRegionSection section = {
972 .mr = rb->mr,
973 .offset_within_region = start,
974 .size = int128_make64(qemu_ram_pagesize(rb)),
975 };
976
977 return !ram_discard_manager_is_populated(rdm, §ion);
978 }
979 return false;
980}
981
982
983static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
984{
985 uint64_t new_dirty_pages =
986 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
987
988 rs->migration_dirty_pages += new_dirty_pages;
989 rs->num_dirty_pages_period += new_dirty_pages;
990}
991
992
993
994
995
996
997
998
999
1000
1001uint64_t ram_pagesize_summary(void)
1002{
1003 RAMBlock *block;
1004 uint64_t summary = 0;
1005
1006 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1007 summary |= block->page_size;
1008 }
1009
1010 return summary;
1011}
1012
1013uint64_t ram_get_total_transferred_pages(void)
1014{
1015 return ram_counters.normal + ram_counters.duplicate +
1016 compression_counters.pages + xbzrle_counters.pages;
1017}
1018
1019static void migration_update_rates(RAMState *rs, int64_t end_time)
1020{
1021 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
1022 double compressed_size;
1023
1024
1025 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1026 / (end_time - rs->time_last_bitmap_sync);
1027
1028 if (!page_count) {
1029 return;
1030 }
1031
1032 if (migrate_use_xbzrle()) {
1033 double encoded_size, unencoded_size;
1034
1035 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1036 rs->xbzrle_cache_miss_prev) / page_count;
1037 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1038 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
1039 TARGET_PAGE_SIZE;
1040 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
1041 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
1042 xbzrle_counters.encoding_rate = 0;
1043 } else {
1044 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
1045 }
1046 rs->xbzrle_pages_prev = xbzrle_counters.pages;
1047 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
1048 }
1049
1050 if (migrate_use_compression()) {
1051 compression_counters.busy_rate = (double)(compression_counters.busy -
1052 rs->compress_thread_busy_prev) / page_count;
1053 rs->compress_thread_busy_prev = compression_counters.busy;
1054
1055 compressed_size = compression_counters.compressed_size -
1056 rs->compressed_size_prev;
1057 if (compressed_size) {
1058 double uncompressed_size = (compression_counters.pages -
1059 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1060
1061
1062 compression_counters.compression_rate =
1063 uncompressed_size / compressed_size;
1064
1065 rs->compress_pages_prev = compression_counters.pages;
1066 rs->compressed_size_prev = compression_counters.compressed_size;
1067 }
1068 }
1069}
1070
1071static void migration_trigger_throttle(RAMState *rs)
1072{
1073 MigrationState *s = migrate_get_current();
1074 uint64_t threshold = s->parameters.throttle_trigger_threshold;
1075
1076 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
1077 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1078 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1079
1080
1081
1082
1083 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1084
1085
1086
1087
1088
1089
1090 if ((bytes_dirty_period > bytes_dirty_threshold) &&
1091 (++rs->dirty_rate_high_cnt >= 2)) {
1092 trace_migration_throttle();
1093 rs->dirty_rate_high_cnt = 0;
1094 mig_throttle_guest_down(bytes_dirty_period,
1095 bytes_dirty_threshold);
1096 }
1097 }
1098}
1099
1100static void migration_bitmap_sync(RAMState *rs)
1101{
1102 RAMBlock *block;
1103 int64_t end_time;
1104
1105 ram_counters.dirty_sync_count++;
1106
1107 if (!rs->time_last_bitmap_sync) {
1108 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1109 }
1110
1111 trace_migration_bitmap_sync_start();
1112 memory_global_dirty_log_sync();
1113
1114 qemu_mutex_lock(&rs->bitmap_mutex);
1115 WITH_RCU_READ_LOCK_GUARD() {
1116 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1117 ramblock_sync_dirty_bitmap(rs, block);
1118 }
1119 ram_counters.remaining = ram_bytes_remaining();
1120 }
1121 qemu_mutex_unlock(&rs->bitmap_mutex);
1122
1123 memory_global_after_dirty_log_sync();
1124 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1125
1126 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1127
1128
1129 if (end_time > rs->time_last_bitmap_sync + 1000) {
1130 migration_trigger_throttle(rs);
1131
1132 migration_update_rates(rs, end_time);
1133
1134 rs->target_page_count_prev = rs->target_page_count;
1135
1136
1137 rs->time_last_bitmap_sync = end_time;
1138 rs->num_dirty_pages_period = 0;
1139 rs->bytes_xfer_prev = ram_counters.transferred;
1140 }
1141 if (migrate_use_events()) {
1142 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1143 }
1144}
1145
1146static void migration_bitmap_sync_precopy(RAMState *rs)
1147{
1148 Error *local_err = NULL;
1149
1150
1151
1152
1153
1154 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1155 error_report_err(local_err);
1156 local_err = NULL;
1157 }
1158
1159 migration_bitmap_sync(rs);
1160
1161 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1162 error_report_err(local_err);
1163 }
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1178 RAMBlock *block, ram_addr_t offset)
1179{
1180 uint8_t *p = block->host + offset;
1181 int len = 0;
1182
1183 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1184 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1185 qemu_put_byte(file, 0);
1186 len += 1;
1187 }
1188 return len;
1189}
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1201{
1202 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1203
1204 if (len) {
1205 ram_counters.duplicate++;
1206 ram_counters.transferred += len;
1207 return 1;
1208 }
1209 return -1;
1210}
1211
1212static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1213{
1214 if (!migrate_release_ram() || !migration_in_postcopy()) {
1215 return;
1216 }
1217
1218 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
1219}
1220
1221
1222
1223
1224
1225
1226
1227
1228static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1229 int *pages)
1230{
1231 uint64_t bytes_xmit = 0;
1232 int ret;
1233
1234 *pages = -1;
1235 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1236 &bytes_xmit);
1237 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1238 return false;
1239 }
1240
1241 if (bytes_xmit) {
1242 ram_counters.transferred += bytes_xmit;
1243 *pages = 1;
1244 }
1245
1246 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1247 return true;
1248 }
1249
1250 if (bytes_xmit > 0) {
1251 ram_counters.normal++;
1252 } else if (bytes_xmit == 0) {
1253 ram_counters.duplicate++;
1254 }
1255
1256 return true;
1257}
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1271 uint8_t *buf, bool async)
1272{
1273 ram_counters.transferred += save_page_header(rs, rs->f, block,
1274 offset | RAM_SAVE_FLAG_PAGE);
1275 if (async) {
1276 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1277 migrate_release_ram() &
1278 migration_in_postcopy());
1279 } else {
1280 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1281 }
1282 ram_counters.transferred += TARGET_PAGE_SIZE;
1283 ram_counters.normal++;
1284 return 1;
1285}
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1301{
1302 int pages = -1;
1303 uint8_t *p;
1304 bool send_async = true;
1305 RAMBlock *block = pss->block;
1306 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1307 ram_addr_t current_addr = block->offset + offset;
1308
1309 p = block->host + offset;
1310 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1311
1312 XBZRLE_cache_lock();
1313 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
1314 pages = save_xbzrle_page(rs, &p, current_addr, block,
1315 offset, last_stage);
1316 if (!last_stage) {
1317
1318
1319
1320 send_async = false;
1321 }
1322 }
1323
1324
1325 if (pages == -1) {
1326 pages = save_normal_page(rs, block, offset, p, send_async);
1327 }
1328
1329 XBZRLE_cache_unlock();
1330
1331 return pages;
1332}
1333
1334static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1335 ram_addr_t offset)
1336{
1337 if (multifd_queue_page(rs->f, block, offset) < 0) {
1338 return -1;
1339 }
1340 ram_counters.normal++;
1341
1342 return 1;
1343}
1344
1345static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1346 ram_addr_t offset, uint8_t *source_buf)
1347{
1348 RAMState *rs = ram_state;
1349 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1350 bool zero_page = false;
1351 int ret;
1352
1353 if (save_zero_page_to_file(rs, f, block, offset)) {
1354 zero_page = true;
1355 goto exit;
1356 }
1357
1358 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1359
1360
1361
1362
1363
1364
1365 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1366 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1367 if (ret < 0) {
1368 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1369 error_report("compressed data failed!");
1370 return false;
1371 }
1372
1373exit:
1374 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1375 return zero_page;
1376}
1377
1378static void
1379update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1380{
1381 ram_counters.transferred += bytes_xmit;
1382
1383 if (param->zero_page) {
1384 ram_counters.duplicate++;
1385 return;
1386 }
1387
1388
1389 compression_counters.compressed_size += bytes_xmit - 8;
1390 compression_counters.pages++;
1391}
1392
1393static bool save_page_use_compression(RAMState *rs);
1394
1395static void flush_compressed_data(RAMState *rs)
1396{
1397 int idx, len, thread_count;
1398
1399 if (!save_page_use_compression(rs)) {
1400 return;
1401 }
1402 thread_count = migrate_compress_threads();
1403
1404 qemu_mutex_lock(&comp_done_lock);
1405 for (idx = 0; idx < thread_count; idx++) {
1406 while (!comp_param[idx].done) {
1407 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1408 }
1409 }
1410 qemu_mutex_unlock(&comp_done_lock);
1411
1412 for (idx = 0; idx < thread_count; idx++) {
1413 qemu_mutex_lock(&comp_param[idx].mutex);
1414 if (!comp_param[idx].quit) {
1415 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1416
1417
1418
1419
1420
1421 update_compress_thread_counts(&comp_param[idx], len);
1422 }
1423 qemu_mutex_unlock(&comp_param[idx].mutex);
1424 }
1425}
1426
1427static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1428 ram_addr_t offset)
1429{
1430 param->block = block;
1431 param->offset = offset;
1432}
1433
1434static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1435 ram_addr_t offset)
1436{
1437 int idx, thread_count, bytes_xmit = -1, pages = -1;
1438 bool wait = migrate_compress_wait_thread();
1439
1440 thread_count = migrate_compress_threads();
1441 qemu_mutex_lock(&comp_done_lock);
1442retry:
1443 for (idx = 0; idx < thread_count; idx++) {
1444 if (comp_param[idx].done) {
1445 comp_param[idx].done = false;
1446 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1447 qemu_mutex_lock(&comp_param[idx].mutex);
1448 set_compress_params(&comp_param[idx], block, offset);
1449 qemu_cond_signal(&comp_param[idx].cond);
1450 qemu_mutex_unlock(&comp_param[idx].mutex);
1451 pages = 1;
1452 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1453 break;
1454 }
1455 }
1456
1457
1458
1459
1460
1461 if (pages < 0 && wait) {
1462 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1463 goto retry;
1464 }
1465 qemu_mutex_unlock(&comp_done_lock);
1466
1467 return pages;
1468}
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1481{
1482 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1483 if (pss->complete_round && pss->block == rs->last_seen_block &&
1484 pss->page >= rs->last_page) {
1485
1486
1487
1488
1489 *again = false;
1490 return false;
1491 }
1492 if (!offset_in_ramblock(pss->block,
1493 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
1494
1495 pss->page = 0;
1496 pss->block = QLIST_NEXT_RCU(pss->block, next);
1497 if (!pss->block) {
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507 flush_compressed_data(rs);
1508
1509
1510 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1511
1512 pss->complete_round = true;
1513
1514 if (migrate_use_xbzrle()) {
1515 rs->xbzrle_enabled = true;
1516 }
1517 }
1518
1519 *again = true;
1520 return false;
1521 } else {
1522
1523 *again = true;
1524
1525 return true;
1526 }
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1540{
1541 RAMBlock *block = NULL;
1542
1543 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1544 return NULL;
1545 }
1546
1547 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1548 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1549 struct RAMSrcPageRequest *entry =
1550 QSIMPLEQ_FIRST(&rs->src_page_requests);
1551 block = entry->rb;
1552 *offset = entry->offset;
1553
1554 if (entry->len > TARGET_PAGE_SIZE) {
1555 entry->len -= TARGET_PAGE_SIZE;
1556 entry->offset += TARGET_PAGE_SIZE;
1557 } else {
1558 memory_region_unref(block->mr);
1559 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1560 g_free(entry);
1561 migration_consume_urgent_request();
1562 }
1563 }
1564
1565 return block;
1566}
1567
1568#if defined(__linux__)
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1580{
1581 struct uffd_msg uffd_msg;
1582 void *page_address;
1583 RAMBlock *block;
1584 int res;
1585
1586 if (!migrate_background_snapshot()) {
1587 return NULL;
1588 }
1589
1590 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1591 if (res <= 0) {
1592 return NULL;
1593 }
1594
1595 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
1596 block = qemu_ram_block_from_host(page_address, false, offset);
1597 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1598 return block;
1599}
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1612 unsigned long start_page)
1613{
1614 int res = 0;
1615
1616
1617 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1618 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1619 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
1620
1621
1622 qemu_fflush(rs->f);
1623
1624 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1625 false, false);
1626 }
1627
1628 return res;
1629}
1630
1631
1632
1633
1634
1635bool ram_write_tracking_available(void)
1636{
1637 uint64_t uffd_features;
1638 int res;
1639
1640 res = uffd_query_features(&uffd_features);
1641 return (res == 0 &&
1642 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1643}
1644
1645
1646
1647
1648
1649
1650bool ram_write_tracking_compatible(void)
1651{
1652 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1653 int uffd_fd;
1654 RAMBlock *block;
1655 bool ret = false;
1656
1657
1658 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1659 if (uffd_fd < 0) {
1660 return false;
1661 }
1662
1663 RCU_READ_LOCK_GUARD();
1664
1665 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1666 uint64_t uffd_ioctls;
1667
1668
1669 if (block->mr->readonly || block->mr->rom_device) {
1670 continue;
1671 }
1672
1673 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1674 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1675 goto out;
1676 }
1677 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1678 goto out;
1679 }
1680 }
1681 ret = true;
1682
1683out:
1684 uffd_close_fd(uffd_fd);
1685 return ret;
1686}
1687
1688static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1689 ram_addr_t size)
1690{
1691
1692
1693
1694
1695
1696
1697 for (; offset < size; offset += block->page_size) {
1698 char tmp = *((char *)block->host + offset);
1699
1700
1701 asm volatile("" : "+r" (tmp));
1702 }
1703}
1704
1705static inline int populate_read_section(MemoryRegionSection *section,
1706 void *opaque)
1707{
1708 const hwaddr size = int128_get64(section->size);
1709 hwaddr offset = section->offset_within_region;
1710 RAMBlock *block = section->mr->ram_block;
1711
1712 populate_read_range(block, offset, size);
1713 return 0;
1714}
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725static void ram_block_populate_read(RAMBlock *rb)
1726{
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1739 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1740 MemoryRegionSection section = {
1741 .mr = rb->mr,
1742 .offset_within_region = 0,
1743 .size = rb->mr->size,
1744 };
1745
1746 ram_discard_manager_replay_populated(rdm, §ion,
1747 populate_read_section, NULL);
1748 } else {
1749 populate_read_range(rb, 0, rb->used_length);
1750 }
1751}
1752
1753
1754
1755
1756void ram_write_tracking_prepare(void)
1757{
1758 RAMBlock *block;
1759
1760 RCU_READ_LOCK_GUARD();
1761
1762 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1763
1764 if (block->mr->readonly || block->mr->rom_device) {
1765 continue;
1766 }
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776 ram_block_populate_read(block);
1777 }
1778}
1779
1780
1781
1782
1783
1784
1785int ram_write_tracking_start(void)
1786{
1787 int uffd_fd;
1788 RAMState *rs = ram_state;
1789 RAMBlock *block;
1790
1791
1792 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1793 if (uffd_fd < 0) {
1794 return uffd_fd;
1795 }
1796 rs->uffdio_fd = uffd_fd;
1797
1798 RCU_READ_LOCK_GUARD();
1799
1800 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1801
1802 if (block->mr->readonly || block->mr->rom_device) {
1803 continue;
1804 }
1805
1806
1807 if (uffd_register_memory(rs->uffdio_fd, block->host,
1808 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1809 goto fail;
1810 }
1811
1812 if (uffd_change_protection(rs->uffdio_fd, block->host,
1813 block->max_length, true, false)) {
1814 goto fail;
1815 }
1816 block->flags |= RAM_UF_WRITEPROTECT;
1817 memory_region_ref(block->mr);
1818
1819 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1820 block->host, block->max_length);
1821 }
1822
1823 return 0;
1824
1825fail:
1826 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1827
1828 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1829 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1830 continue;
1831 }
1832
1833
1834
1835
1836 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1837 false, false);
1838 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1839
1840 block->flags &= ~RAM_UF_WRITEPROTECT;
1841 memory_region_unref(block->mr);
1842 }
1843
1844 uffd_close_fd(uffd_fd);
1845 rs->uffdio_fd = -1;
1846 return -1;
1847}
1848
1849
1850
1851
1852void ram_write_tracking_stop(void)
1853{
1854 RAMState *rs = ram_state;
1855 RAMBlock *block;
1856
1857 RCU_READ_LOCK_GUARD();
1858
1859 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1860 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1861 continue;
1862 }
1863
1864 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1865 false, false);
1866 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1867
1868 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1869 block->host, block->max_length);
1870
1871
1872 block->flags &= ~RAM_UF_WRITEPROTECT;
1873 memory_region_unref(block->mr);
1874 }
1875
1876
1877 uffd_close_fd(rs->uffdio_fd);
1878 rs->uffdio_fd = -1;
1879}
1880
1881#else
1882
1883
1884static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1885{
1886 (void) rs;
1887 (void) offset;
1888
1889 return NULL;
1890}
1891
1892static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1893 unsigned long start_page)
1894{
1895 (void) rs;
1896 (void) pss;
1897 (void) start_page;
1898
1899 return 0;
1900}
1901
1902bool ram_write_tracking_available(void)
1903{
1904 return false;
1905}
1906
1907bool ram_write_tracking_compatible(void)
1908{
1909 assert(0);
1910 return false;
1911}
1912
1913int ram_write_tracking_start(void)
1914{
1915 assert(0);
1916 return -1;
1917}
1918
1919void ram_write_tracking_stop(void)
1920{
1921 assert(0);
1922}
1923#endif
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1936{
1937 RAMBlock *block;
1938 ram_addr_t offset;
1939 bool dirty;
1940
1941 do {
1942 block = unqueue_page(rs, &offset);
1943
1944
1945
1946
1947
1948
1949 if (block) {
1950 unsigned long page;
1951
1952 page = offset >> TARGET_PAGE_BITS;
1953 dirty = test_bit(page, block->bmap);
1954 if (!dirty) {
1955 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1956 page);
1957 } else {
1958 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1959 }
1960 }
1961
1962 } while (block && !dirty);
1963
1964 if (!block) {
1965
1966
1967
1968
1969 block = poll_fault_page(rs, &offset);
1970 }
1971
1972 if (block) {
1973
1974
1975
1976
1977
1978 pss->block = block;
1979 pss->page = offset >> TARGET_PAGE_BITS;
1980
1981
1982
1983
1984
1985 pss->complete_round = false;
1986 }
1987
1988 return !!block;
1989}
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999static void migration_page_queue_free(RAMState *rs)
2000{
2001 struct RAMSrcPageRequest *mspr, *next_mspr;
2002
2003
2004
2005 RCU_READ_LOCK_GUARD();
2006 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
2007 memory_region_unref(mspr->rb->mr);
2008 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2009 g_free(mspr);
2010 }
2011}
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2026{
2027 RAMBlock *ramblock;
2028 RAMState *rs = ram_state;
2029
2030 ram_counters.postcopy_requests++;
2031 RCU_READ_LOCK_GUARD();
2032
2033 if (!rbname) {
2034
2035 ramblock = rs->last_req_rb;
2036
2037 if (!ramblock) {
2038
2039
2040
2041
2042 error_report("ram_save_queue_pages no previous block");
2043 return -1;
2044 }
2045 } else {
2046 ramblock = qemu_ram_block_by_name(rbname);
2047
2048 if (!ramblock) {
2049
2050 error_report("ram_save_queue_pages no block '%s'", rbname);
2051 return -1;
2052 }
2053 rs->last_req_rb = ramblock;
2054 }
2055 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2056 if (!offset_in_ramblock(ramblock, start + len - 1)) {
2057 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2058 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2059 __func__, start, len, ramblock->used_length);
2060 return -1;
2061 }
2062
2063 struct RAMSrcPageRequest *new_entry =
2064 g_malloc0(sizeof(struct RAMSrcPageRequest));
2065 new_entry->rb = ramblock;
2066 new_entry->offset = start;
2067 new_entry->len = len;
2068
2069 memory_region_ref(ramblock->mr);
2070 qemu_mutex_lock(&rs->src_page_req_mutex);
2071 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2072 migration_make_urgent_request();
2073 qemu_mutex_unlock(&rs->src_page_req_mutex);
2074
2075 return 0;
2076}
2077
2078static bool save_page_use_compression(RAMState *rs)
2079{
2080 if (!migrate_use_compression()) {
2081 return false;
2082 }
2083
2084
2085
2086
2087
2088
2089 if (rs->xbzrle_enabled) {
2090 return false;
2091 }
2092
2093 return true;
2094}
2095
2096
2097
2098
2099
2100
2101static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2102{
2103 if (!save_page_use_compression(rs)) {
2104 return false;
2105 }
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117 if (block != rs->last_sent_block) {
2118 flush_compressed_data(rs);
2119 return false;
2120 }
2121
2122 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2123 return true;
2124 }
2125
2126 compression_counters.busy++;
2127 return false;
2128}
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
2140 bool last_stage)
2141{
2142 RAMBlock *block = pss->block;
2143 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
2144 int res;
2145
2146 if (control_save_page(rs, block, offset, &res)) {
2147 return res;
2148 }
2149
2150 if (save_compress_page(rs, block, offset)) {
2151 return 1;
2152 }
2153
2154 res = save_zero_page(rs, block, offset);
2155 if (res > 0) {
2156
2157
2158
2159 if (!save_page_use_compression(rs)) {
2160 XBZRLE_cache_lock();
2161 xbzrle_cache_zero_page(rs, block->offset + offset);
2162 XBZRLE_cache_unlock();
2163 }
2164 ram_release_pages(block->idstr, offset, res);
2165 return res;
2166 }
2167
2168
2169
2170
2171
2172
2173
2174 if (!save_page_use_compression(rs) && migrate_use_multifd()
2175 && !migration_in_postcopy()) {
2176 return ram_save_multifd_page(rs, block, offset);
2177 }
2178
2179 return ram_save_page(rs, pss, last_stage);
2180}
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2201 bool last_stage)
2202{
2203 int tmppages, pages = 0;
2204 size_t pagesize_bits =
2205 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2206 unsigned long hostpage_boundary =
2207 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
2208 unsigned long start_page = pss->page;
2209 int res;
2210
2211 if (ramblock_is_ignored(pss->block)) {
2212 error_report("block %s should not be migrated !", pss->block->idstr);
2213 return 0;
2214 }
2215
2216 do {
2217
2218 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2219 tmppages = ram_save_target_page(rs, pss, last_stage);
2220 if (tmppages < 0) {
2221 return tmppages;
2222 }
2223
2224 pages += tmppages;
2225
2226
2227
2228
2229 if (pagesize_bits > 1 && tmppages > 0) {
2230 migration_rate_limit();
2231 }
2232 }
2233 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2234 } while ((pss->page < hostpage_boundary) &&
2235 offset_in_ramblock(pss->block,
2236 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
2237
2238 pss->page = MIN(pss->page, hostpage_boundary) - 1;
2239
2240 res = ram_save_release_protection(rs, pss, start_page);
2241 return (res < 0 ? res : pages);
2242}
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2260{
2261 PageSearchStatus pss;
2262 int pages = 0;
2263 bool again, found;
2264
2265
2266 if (!ram_bytes_total()) {
2267 return pages;
2268 }
2269
2270 pss.block = rs->last_seen_block;
2271 pss.page = rs->last_page;
2272 pss.complete_round = false;
2273
2274 if (!pss.block) {
2275 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2276 }
2277
2278 do {
2279 again = true;
2280 found = get_queued_page(rs, &pss);
2281
2282 if (!found) {
2283
2284 found = find_dirty_block(rs, &pss, &again);
2285 }
2286
2287 if (found) {
2288 pages = ram_save_host_page(rs, &pss, last_stage);
2289 }
2290 } while (!pages && again);
2291
2292 rs->last_seen_block = pss.block;
2293 rs->last_page = pss.page;
2294
2295 return pages;
2296}
2297
2298void acct_update_position(QEMUFile *f, size_t size, bool zero)
2299{
2300 uint64_t pages = size / TARGET_PAGE_SIZE;
2301
2302 if (zero) {
2303 ram_counters.duplicate += pages;
2304 } else {
2305 ram_counters.normal += pages;
2306 ram_counters.transferred += size;
2307 qemu_update_position(f, size);
2308 }
2309}
2310
2311static uint64_t ram_bytes_total_common(bool count_ignored)
2312{
2313 RAMBlock *block;
2314 uint64_t total = 0;
2315
2316 RCU_READ_LOCK_GUARD();
2317
2318 if (count_ignored) {
2319 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2320 total += block->used_length;
2321 }
2322 } else {
2323 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2324 total += block->used_length;
2325 }
2326 }
2327 return total;
2328}
2329
2330uint64_t ram_bytes_total(void)
2331{
2332 return ram_bytes_total_common(false);
2333}
2334
2335static void xbzrle_load_setup(void)
2336{
2337 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2338}
2339
2340static void xbzrle_load_cleanup(void)
2341{
2342 g_free(XBZRLE.decoded_buf);
2343 XBZRLE.decoded_buf = NULL;
2344}
2345
2346static void ram_state_cleanup(RAMState **rsp)
2347{
2348 if (*rsp) {
2349 migration_page_queue_free(*rsp);
2350 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2351 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2352 g_free(*rsp);
2353 *rsp = NULL;
2354 }
2355}
2356
2357static void xbzrle_cleanup(void)
2358{
2359 XBZRLE_cache_lock();
2360 if (XBZRLE.cache) {
2361 cache_fini(XBZRLE.cache);
2362 g_free(XBZRLE.encoded_buf);
2363 g_free(XBZRLE.current_buf);
2364 g_free(XBZRLE.zero_target_page);
2365 XBZRLE.cache = NULL;
2366 XBZRLE.encoded_buf = NULL;
2367 XBZRLE.current_buf = NULL;
2368 XBZRLE.zero_target_page = NULL;
2369 }
2370 XBZRLE_cache_unlock();
2371}
2372
2373static void ram_save_cleanup(void *opaque)
2374{
2375 RAMState **rsp = opaque;
2376 RAMBlock *block;
2377
2378
2379 if (!migrate_background_snapshot()) {
2380
2381
2382
2383 if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
2384
2385
2386
2387
2388
2389 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
2390 }
2391 }
2392
2393 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2394 g_free(block->clear_bmap);
2395 block->clear_bmap = NULL;
2396 g_free(block->bmap);
2397 block->bmap = NULL;
2398 }
2399
2400 xbzrle_cleanup();
2401 compress_threads_save_cleanup();
2402 ram_state_cleanup(rsp);
2403}
2404
2405static void ram_state_reset(RAMState *rs)
2406{
2407 rs->last_seen_block = NULL;
2408 rs->last_sent_block = NULL;
2409 rs->last_page = 0;
2410 rs->last_version = ram_list.version;
2411 rs->xbzrle_enabled = false;
2412}
2413
2414#define MAX_WAIT 50
2415
2416
2417
2418
2419
2420
2421void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2422 unsigned long pages)
2423{
2424 int64_t cur;
2425 int64_t linelen = 128;
2426 char linebuf[129];
2427
2428 for (cur = 0; cur < pages; cur += linelen) {
2429 int64_t curb;
2430 bool found = false;
2431
2432
2433
2434
2435 if (cur + linelen > pages) {
2436 linelen = pages - cur;
2437 }
2438 for (curb = 0; curb < linelen; curb++) {
2439 bool thisbit = test_bit(cur + curb, todump);
2440 linebuf[curb] = thisbit ? '1' : '.';
2441 found = found || (thisbit != expected);
2442 }
2443 if (found) {
2444 linebuf[curb] = '\0';
2445 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2446 }
2447 }
2448}
2449
2450
2451
2452void ram_postcopy_migrated_memory_release(MigrationState *ms)
2453{
2454 struct RAMBlock *block;
2455
2456 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2457 unsigned long *bitmap = block->bmap;
2458 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2459 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2460
2461 while (run_start < range) {
2462 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2463 ram_discard_range(block->idstr,
2464 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2465 ((ram_addr_t)(run_end - run_start))
2466 << TARGET_PAGE_BITS);
2467 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2468 }
2469 }
2470}
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2483{
2484 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2485 unsigned long current;
2486 unsigned long *bitmap = block->bmap;
2487
2488 for (current = 0; current < end; ) {
2489 unsigned long one = find_next_bit(bitmap, end, current);
2490 unsigned long zero, discard_length;
2491
2492 if (one >= end) {
2493 break;
2494 }
2495
2496 zero = find_next_zero_bit(bitmap, end, one + 1);
2497
2498 if (zero >= end) {
2499 discard_length = end - one;
2500 } else {
2501 discard_length = zero - one;
2502 }
2503 postcopy_discard_send_range(ms, one, discard_length);
2504 current = one + discard_length;
2505 }
2506
2507 return 0;
2508}
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523static int postcopy_each_ram_send_discard(MigrationState *ms)
2524{
2525 struct RAMBlock *block;
2526 int ret;
2527
2528 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2529 postcopy_discard_send_init(ms, block->idstr);
2530
2531
2532
2533
2534
2535
2536 ret = postcopy_send_discard_bm_ram(ms, block);
2537 postcopy_discard_send_finish(ms);
2538 if (ret) {
2539 return ret;
2540 }
2541 }
2542
2543 return 0;
2544}
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2560{
2561 RAMState *rs = ram_state;
2562 unsigned long *bitmap = block->bmap;
2563 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2564 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2565 unsigned long run_start;
2566
2567 if (block->page_size == TARGET_PAGE_SIZE) {
2568
2569 return;
2570 }
2571
2572
2573 run_start = find_next_bit(bitmap, pages, 0);
2574
2575 while (run_start < pages) {
2576
2577
2578
2579
2580
2581 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2582
2583 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2584
2585
2586
2587
2588
2589 }
2590
2591 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2592 unsigned long page;
2593 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2594 host_ratio);
2595 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2596
2597
2598 for (page = fixup_start_addr;
2599 page < fixup_start_addr + host_ratio; page++) {
2600
2601
2602
2603
2604 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2605 }
2606 }
2607
2608
2609 run_start = find_next_bit(bitmap, pages, run_start);
2610 }
2611}
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2628{
2629 postcopy_discard_send_init(ms, block->idstr);
2630
2631
2632
2633
2634 postcopy_chunk_hostpages_pass(ms, block);
2635
2636 postcopy_discard_send_finish(ms);
2637 return 0;
2638}
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2656{
2657 RAMState *rs = ram_state;
2658 RAMBlock *block;
2659 int ret;
2660
2661 RCU_READ_LOCK_GUARD();
2662
2663
2664 migration_bitmap_sync(rs);
2665
2666
2667 rs->last_seen_block = NULL;
2668 rs->last_sent_block = NULL;
2669 rs->last_page = 0;
2670
2671 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2672
2673 ret = postcopy_chunk_hostpages(ms, block);
2674 if (ret) {
2675 return ret;
2676 }
2677
2678#ifdef DEBUG_POSTCOPY
2679 ram_debug_dump_bitmap(block->bmap, true,
2680 block->used_length >> TARGET_PAGE_BITS);
2681#endif
2682 }
2683 trace_ram_postcopy_send_discard_bitmap();
2684
2685 return postcopy_each_ram_send_discard(ms);
2686}
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2699{
2700 trace_ram_discard_range(rbname, start, length);
2701
2702 RCU_READ_LOCK_GUARD();
2703 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2704
2705 if (!rb) {
2706 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2707 return -1;
2708 }
2709
2710
2711
2712
2713
2714 if (rb->receivedmap) {
2715 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2716 length >> qemu_target_page_bits());
2717 }
2718
2719 return ram_block_discard_range(rb, start, length);
2720}
2721
2722
2723
2724
2725
2726static int xbzrle_init(void)
2727{
2728 Error *local_err = NULL;
2729
2730 if (!migrate_use_xbzrle()) {
2731 return 0;
2732 }
2733
2734 XBZRLE_cache_lock();
2735
2736 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2737 if (!XBZRLE.zero_target_page) {
2738 error_report("%s: Error allocating zero page", __func__);
2739 goto err_out;
2740 }
2741
2742 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2743 TARGET_PAGE_SIZE, &local_err);
2744 if (!XBZRLE.cache) {
2745 error_report_err(local_err);
2746 goto free_zero_page;
2747 }
2748
2749 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2750 if (!XBZRLE.encoded_buf) {
2751 error_report("%s: Error allocating encoded_buf", __func__);
2752 goto free_cache;
2753 }
2754
2755 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2756 if (!XBZRLE.current_buf) {
2757 error_report("%s: Error allocating current_buf", __func__);
2758 goto free_encoded_buf;
2759 }
2760
2761
2762 XBZRLE_cache_unlock();
2763 return 0;
2764
2765free_encoded_buf:
2766 g_free(XBZRLE.encoded_buf);
2767 XBZRLE.encoded_buf = NULL;
2768free_cache:
2769 cache_fini(XBZRLE.cache);
2770 XBZRLE.cache = NULL;
2771free_zero_page:
2772 g_free(XBZRLE.zero_target_page);
2773 XBZRLE.zero_target_page = NULL;
2774err_out:
2775 XBZRLE_cache_unlock();
2776 return -ENOMEM;
2777}
2778
2779static int ram_state_init(RAMState **rsp)
2780{
2781 *rsp = g_try_new0(RAMState, 1);
2782
2783 if (!*rsp) {
2784 error_report("%s: Init ramstate fail", __func__);
2785 return -1;
2786 }
2787
2788 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2789 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2790 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2791
2792
2793
2794
2795
2796
2797 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2798 ram_state_reset(*rsp);
2799
2800 return 0;
2801}
2802
2803static void ram_list_init_bitmaps(void)
2804{
2805 MigrationState *ms = migrate_get_current();
2806 RAMBlock *block;
2807 unsigned long pages;
2808 uint8_t shift;
2809
2810
2811 if (ram_bytes_total()) {
2812 shift = ms->clear_bitmap_shift;
2813 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2814 error_report("clear_bitmap_shift (%u) too big, using "
2815 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2816 shift = CLEAR_BITMAP_SHIFT_MAX;
2817 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2818 error_report("clear_bitmap_shift (%u) too small, using "
2819 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2820 shift = CLEAR_BITMAP_SHIFT_MIN;
2821 }
2822
2823 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2824 pages = block->max_length >> TARGET_PAGE_BITS;
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834 block->bmap = bitmap_new(pages);
2835 bitmap_set(block->bmap, 0, pages);
2836 block->clear_bmap_shift = shift;
2837 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2838 }
2839 }
2840}
2841
2842static void migration_bitmap_clear_discarded_pages(RAMState *rs)
2843{
2844 unsigned long pages;
2845 RAMBlock *rb;
2846
2847 RCU_READ_LOCK_GUARD();
2848
2849 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
2850 pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
2851 rs->migration_dirty_pages -= pages;
2852 }
2853}
2854
2855static void ram_init_bitmaps(RAMState *rs)
2856{
2857
2858 qemu_mutex_lock_iothread();
2859 qemu_mutex_lock_ramlist();
2860
2861 WITH_RCU_READ_LOCK_GUARD() {
2862 ram_list_init_bitmaps();
2863
2864 if (!migrate_background_snapshot()) {
2865 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
2866 migration_bitmap_sync_precopy(rs);
2867 }
2868 }
2869 qemu_mutex_unlock_ramlist();
2870 qemu_mutex_unlock_iothread();
2871
2872
2873
2874
2875
2876 migration_bitmap_clear_discarded_pages(rs);
2877}
2878
2879static int ram_init_all(RAMState **rsp)
2880{
2881 if (ram_state_init(rsp)) {
2882 return -1;
2883 }
2884
2885 if (xbzrle_init()) {
2886 ram_state_cleanup(rsp);
2887 return -1;
2888 }
2889
2890 ram_init_bitmaps(*rsp);
2891
2892 return 0;
2893}
2894
2895static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2896{
2897 RAMBlock *block;
2898 uint64_t pages = 0;
2899
2900
2901
2902
2903
2904
2905
2906 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2907 pages += bitmap_count_one(block->bmap,
2908 block->used_length >> TARGET_PAGE_BITS);
2909 }
2910
2911
2912 rs->migration_dirty_pages = pages;
2913
2914 ram_state_reset(rs);
2915
2916
2917 rs->f = out;
2918
2919 trace_ram_state_resume_prepare(pages);
2920}
2921
2922
2923
2924
2925
2926
2927
2928void qemu_guest_free_page_hint(void *addr, size_t len)
2929{
2930 RAMBlock *block;
2931 ram_addr_t offset;
2932 size_t used_len, start, npages;
2933 MigrationState *s = migrate_get_current();
2934
2935
2936 if (!migration_is_setup_or_active(s->state)) {
2937 return;
2938 }
2939
2940 for (; len > 0; len -= used_len, addr += used_len) {
2941 block = qemu_ram_block_from_host(addr, false, &offset);
2942 if (unlikely(!block || offset >= block->used_length)) {
2943
2944
2945
2946
2947
2948 error_report_once("%s unexpected error", __func__);
2949 return;
2950 }
2951
2952 if (len <= block->used_length - offset) {
2953 used_len = len;
2954 } else {
2955 used_len = block->used_length - offset;
2956 }
2957
2958 start = offset >> TARGET_PAGE_BITS;
2959 npages = used_len >> TARGET_PAGE_BITS;
2960
2961 qemu_mutex_lock(&ram_state->bitmap_mutex);
2962
2963
2964
2965
2966
2967
2968 migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
2969 ram_state->migration_dirty_pages -=
2970 bitmap_count_one_with_offset(block->bmap, start, npages);
2971 bitmap_clear(block->bmap, start, npages);
2972 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2973 }
2974}
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991static int ram_save_setup(QEMUFile *f, void *opaque)
2992{
2993 RAMState **rsp = opaque;
2994 RAMBlock *block;
2995
2996 if (compress_threads_save_setup()) {
2997 return -1;
2998 }
2999
3000
3001 if (!migration_in_colo_state()) {
3002 if (ram_init_all(rsp) != 0) {
3003 compress_threads_save_cleanup();
3004 return -1;
3005 }
3006 }
3007 (*rsp)->f = f;
3008
3009 WITH_RCU_READ_LOCK_GUARD() {
3010 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
3011
3012 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3013 qemu_put_byte(f, strlen(block->idstr));
3014 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3015 qemu_put_be64(f, block->used_length);
3016 if (migrate_postcopy_ram() && block->page_size !=
3017 qemu_host_page_size) {
3018 qemu_put_be64(f, block->page_size);
3019 }
3020 if (migrate_ignore_shared()) {
3021 qemu_put_be64(f, block->mr->addr);
3022 }
3023 }
3024 }
3025
3026 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3027 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3028
3029 multifd_send_sync_main(f);
3030 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3031 qemu_fflush(f);
3032
3033 return 0;
3034}
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044static int ram_save_iterate(QEMUFile *f, void *opaque)
3045{
3046 RAMState **temp = opaque;
3047 RAMState *rs = *temp;
3048 int ret = 0;
3049 int i;
3050 int64_t t0;
3051 int done = 0;
3052
3053 if (blk_mig_bulk_active()) {
3054
3055
3056
3057 goto out;
3058 }
3059
3060
3061
3062
3063
3064
3065
3066
3067 qemu_mutex_lock(&rs->bitmap_mutex);
3068 WITH_RCU_READ_LOCK_GUARD() {
3069 if (ram_list.version != rs->last_version) {
3070 ram_state_reset(rs);
3071 }
3072
3073
3074 smp_rmb();
3075
3076 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3077
3078 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3079 i = 0;
3080 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3081 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3082 int pages;
3083
3084 if (qemu_file_get_error(f)) {
3085 break;
3086 }
3087
3088 pages = ram_find_and_save_block(rs, false);
3089
3090 if (pages == 0) {
3091 done = 1;
3092 break;
3093 }
3094
3095 if (pages < 0) {
3096 qemu_file_set_error(f, pages);
3097 break;
3098 }
3099
3100 rs->target_page_count += pages;
3101
3102
3103
3104
3105
3106 if (migrate_postcopy_ram()) {
3107 flush_compressed_data(rs);
3108 }
3109
3110
3111
3112
3113
3114
3115
3116 if ((i & 63) == 0) {
3117 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3118 1000000;
3119 if (t1 > MAX_WAIT) {
3120 trace_ram_save_iterate_big_wait(t1, i);
3121 break;
3122 }
3123 }
3124 i++;
3125 }
3126 }
3127 qemu_mutex_unlock(&rs->bitmap_mutex);
3128
3129
3130
3131
3132
3133 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3134
3135out:
3136 if (ret >= 0
3137 && migration_is_setup_or_active(migrate_get_current()->state)) {
3138 multifd_send_sync_main(rs->f);
3139 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3140 qemu_fflush(f);
3141 ram_counters.transferred += 8;
3142
3143 ret = qemu_file_get_error(f);
3144 }
3145 if (ret < 0) {
3146 return ret;
3147 }
3148
3149 return done;
3150}
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162static int ram_save_complete(QEMUFile *f, void *opaque)
3163{
3164 RAMState **temp = opaque;
3165 RAMState *rs = *temp;
3166 int ret = 0;
3167
3168 WITH_RCU_READ_LOCK_GUARD() {
3169 if (!migration_in_postcopy()) {
3170 migration_bitmap_sync_precopy(rs);
3171 }
3172
3173 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3174
3175
3176
3177
3178 while (true) {
3179 int pages;
3180
3181 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3182
3183 if (pages == 0) {
3184 break;
3185 }
3186 if (pages < 0) {
3187 ret = pages;
3188 break;
3189 }
3190 }
3191
3192 flush_compressed_data(rs);
3193 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3194 }
3195
3196 if (ret >= 0) {
3197 multifd_send_sync_main(rs->f);
3198 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3199 qemu_fflush(f);
3200 }
3201
3202 return ret;
3203}
3204
3205static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3206 uint64_t *res_precopy_only,
3207 uint64_t *res_compatible,
3208 uint64_t *res_postcopy_only)
3209{
3210 RAMState **temp = opaque;
3211 RAMState *rs = *temp;
3212 uint64_t remaining_size;
3213
3214 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3215
3216 if (!migration_in_postcopy() &&
3217 remaining_size < max_size) {
3218 qemu_mutex_lock_iothread();
3219 WITH_RCU_READ_LOCK_GUARD() {
3220 migration_bitmap_sync_precopy(rs);
3221 }
3222 qemu_mutex_unlock_iothread();
3223 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3224 }
3225
3226 if (migrate_postcopy_ram()) {
3227
3228 *res_compatible += remaining_size;
3229 } else {
3230 *res_precopy_only += remaining_size;
3231 }
3232}
3233
3234static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3235{
3236 unsigned int xh_len;
3237 int xh_flags;
3238 uint8_t *loaded_data;
3239
3240
3241 xh_flags = qemu_get_byte(f);
3242 xh_len = qemu_get_be16(f);
3243
3244 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3245 error_report("Failed to load XBZRLE page - wrong compression!");
3246 return -1;
3247 }
3248
3249 if (xh_len > TARGET_PAGE_SIZE) {
3250 error_report("Failed to load XBZRLE page - len overflow!");
3251 return -1;
3252 }
3253 loaded_data = XBZRLE.decoded_buf;
3254
3255
3256 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3257
3258
3259 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3260 TARGET_PAGE_SIZE) == -1) {
3261 error_report("Failed to load XBZRLE page - decode error!");
3262 return -1;
3263 }
3264
3265 return 0;
3266}
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3279{
3280 static RAMBlock *block;
3281 char id[256];
3282 uint8_t len;
3283
3284 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3285 if (!block) {
3286 error_report("Ack, bad migration stream!");
3287 return NULL;
3288 }
3289 return block;
3290 }
3291
3292 len = qemu_get_byte(f);
3293 qemu_get_buffer(f, (uint8_t *)id, len);
3294 id[len] = 0;
3295
3296 block = qemu_ram_block_by_name(id);
3297 if (!block) {
3298 error_report("Can't find block %s", id);
3299 return NULL;
3300 }
3301
3302 if (ramblock_is_ignored(block)) {
3303 error_report("block %s should not be migrated !", id);
3304 return NULL;
3305 }
3306
3307 return block;
3308}
3309
3310static inline void *host_from_ram_block_offset(RAMBlock *block,
3311 ram_addr_t offset)
3312{
3313 if (!offset_in_ramblock(block, offset)) {
3314 return NULL;
3315 }
3316
3317 return block->host + offset;
3318}
3319
3320static void *host_page_from_ram_block_offset(RAMBlock *block,
3321 ram_addr_t offset)
3322{
3323
3324 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3325 block->page_size);
3326}
3327
3328static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3329 ram_addr_t offset)
3330{
3331 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3332}
3333
3334static inline void *colo_cache_from_block_offset(RAMBlock *block,
3335 ram_addr_t offset, bool record_bitmap)
3336{
3337 if (!offset_in_ramblock(block, offset)) {
3338 return NULL;
3339 }
3340 if (!block->colo_cache) {
3341 error_report("%s: colo_cache is NULL in block :%s",
3342 __func__, block->idstr);
3343 return NULL;
3344 }
3345
3346
3347
3348
3349
3350
3351 if (record_bitmap &&
3352 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3353 ram_state->migration_dirty_pages++;
3354 }
3355 return block->colo_cache + offset;
3356}
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3369{
3370 if (ch != 0 || !is_zero_range(host, size)) {
3371 memset(host, ch, size);
3372 }
3373}
3374
3375
3376static int
3377qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3378 const uint8_t *source, size_t source_len)
3379{
3380 int err;
3381
3382 err = inflateReset(stream);
3383 if (err != Z_OK) {
3384 return -1;
3385 }
3386
3387 stream->avail_in = source_len;
3388 stream->next_in = (uint8_t *)source;
3389 stream->avail_out = dest_len;
3390 stream->next_out = dest;
3391
3392 err = inflate(stream, Z_NO_FLUSH);
3393 if (err != Z_STREAM_END) {
3394 return -1;
3395 }
3396
3397 return stream->total_out;
3398}
3399
3400static void *do_data_decompress(void *opaque)
3401{
3402 DecompressParam *param = opaque;
3403 unsigned long pagesize;
3404 uint8_t *des;
3405 int len, ret;
3406
3407 qemu_mutex_lock(¶m->mutex);
3408 while (!param->quit) {
3409 if (param->des) {
3410 des = param->des;
3411 len = param->len;
3412 param->des = 0;
3413 qemu_mutex_unlock(¶m->mutex);
3414
3415 pagesize = TARGET_PAGE_SIZE;
3416
3417 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3418 param->compbuf, len);
3419 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3420 error_report("decompress data failed");
3421 qemu_file_set_error(decomp_file, ret);
3422 }
3423
3424 qemu_mutex_lock(&decomp_done_lock);
3425 param->done = true;
3426 qemu_cond_signal(&decomp_done_cond);
3427 qemu_mutex_unlock(&decomp_done_lock);
3428
3429 qemu_mutex_lock(¶m->mutex);
3430 } else {
3431 qemu_cond_wait(¶m->cond, ¶m->mutex);
3432 }
3433 }
3434 qemu_mutex_unlock(¶m->mutex);
3435
3436 return NULL;
3437}
3438
3439static int wait_for_decompress_done(void)
3440{
3441 int idx, thread_count;
3442
3443 if (!migrate_use_compression()) {
3444 return 0;
3445 }
3446
3447 thread_count = migrate_decompress_threads();
3448 qemu_mutex_lock(&decomp_done_lock);
3449 for (idx = 0; idx < thread_count; idx++) {
3450 while (!decomp_param[idx].done) {
3451 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3452 }
3453 }
3454 qemu_mutex_unlock(&decomp_done_lock);
3455 return qemu_file_get_error(decomp_file);
3456}
3457
3458static void compress_threads_load_cleanup(void)
3459{
3460 int i, thread_count;
3461
3462 if (!migrate_use_compression()) {
3463 return;
3464 }
3465 thread_count = migrate_decompress_threads();
3466 for (i = 0; i < thread_count; i++) {
3467
3468
3469
3470
3471 if (!decomp_param[i].compbuf) {
3472 break;
3473 }
3474
3475 qemu_mutex_lock(&decomp_param[i].mutex);
3476 decomp_param[i].quit = true;
3477 qemu_cond_signal(&decomp_param[i].cond);
3478 qemu_mutex_unlock(&decomp_param[i].mutex);
3479 }
3480 for (i = 0; i < thread_count; i++) {
3481 if (!decomp_param[i].compbuf) {
3482 break;
3483 }
3484
3485 qemu_thread_join(decompress_threads + i);
3486 qemu_mutex_destroy(&decomp_param[i].mutex);
3487 qemu_cond_destroy(&decomp_param[i].cond);
3488 inflateEnd(&decomp_param[i].stream);
3489 g_free(decomp_param[i].compbuf);
3490 decomp_param[i].compbuf = NULL;
3491 }
3492 g_free(decompress_threads);
3493 g_free(decomp_param);
3494 decompress_threads = NULL;
3495 decomp_param = NULL;
3496 decomp_file = NULL;
3497}
3498
3499static int compress_threads_load_setup(QEMUFile *f)
3500{
3501 int i, thread_count;
3502
3503 if (!migrate_use_compression()) {
3504 return 0;
3505 }
3506
3507 thread_count = migrate_decompress_threads();
3508 decompress_threads = g_new0(QemuThread, thread_count);
3509 decomp_param = g_new0(DecompressParam, thread_count);
3510 qemu_mutex_init(&decomp_done_lock);
3511 qemu_cond_init(&decomp_done_cond);
3512 decomp_file = f;
3513 for (i = 0; i < thread_count; i++) {
3514 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3515 goto exit;
3516 }
3517
3518 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3519 qemu_mutex_init(&decomp_param[i].mutex);
3520 qemu_cond_init(&decomp_param[i].cond);
3521 decomp_param[i].done = true;
3522 decomp_param[i].quit = false;
3523 qemu_thread_create(decompress_threads + i, "decompress",
3524 do_data_decompress, decomp_param + i,
3525 QEMU_THREAD_JOINABLE);
3526 }
3527 return 0;
3528exit:
3529 compress_threads_load_cleanup();
3530 return -1;
3531}
3532
3533static void decompress_data_with_multi_threads(QEMUFile *f,
3534 void *host, int len)
3535{
3536 int idx, thread_count;
3537
3538 thread_count = migrate_decompress_threads();
3539 QEMU_LOCK_GUARD(&decomp_done_lock);
3540 while (true) {
3541 for (idx = 0; idx < thread_count; idx++) {
3542 if (decomp_param[idx].done) {
3543 decomp_param[idx].done = false;
3544 qemu_mutex_lock(&decomp_param[idx].mutex);
3545 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3546 decomp_param[idx].des = host;
3547 decomp_param[idx].len = len;
3548 qemu_cond_signal(&decomp_param[idx].cond);
3549 qemu_mutex_unlock(&decomp_param[idx].mutex);
3550 break;
3551 }
3552 }
3553 if (idx < thread_count) {
3554 break;
3555 } else {
3556 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3557 }
3558 }
3559}
3560
3561static void colo_init_ram_state(void)
3562{
3563 ram_state_init(&ram_state);
3564}
3565
3566
3567
3568
3569
3570
3571int colo_init_ram_cache(void)
3572{
3573 RAMBlock *block;
3574
3575 WITH_RCU_READ_LOCK_GUARD() {
3576 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3577 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3578 NULL, false, false);
3579 if (!block->colo_cache) {
3580 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3581 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3582 block->used_length);
3583 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3584 if (block->colo_cache) {
3585 qemu_anon_ram_free(block->colo_cache, block->used_length);
3586 block->colo_cache = NULL;
3587 }
3588 }
3589 return -errno;
3590 }
3591 if (!machine_dump_guest_core(current_machine)) {
3592 qemu_madvise(block->colo_cache, block->used_length,
3593 QEMU_MADV_DONTDUMP);
3594 }
3595 }
3596 }
3597
3598
3599
3600
3601
3602
3603 if (ram_bytes_total()) {
3604 RAMBlock *block;
3605
3606 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3607 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3608 block->bmap = bitmap_new(pages);
3609 }
3610 }
3611
3612 colo_init_ram_state();
3613 return 0;
3614}
3615
3616
3617void colo_incoming_start_dirty_log(void)
3618{
3619 RAMBlock *block = NULL;
3620
3621 qemu_mutex_lock_iothread();
3622 qemu_mutex_lock_ramlist();
3623
3624 memory_global_dirty_log_sync();
3625 WITH_RCU_READ_LOCK_GUARD() {
3626 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3627 ramblock_sync_dirty_bitmap(ram_state, block);
3628
3629 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3630 }
3631 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
3632 }
3633 ram_state->migration_dirty_pages = 0;
3634 qemu_mutex_unlock_ramlist();
3635 qemu_mutex_unlock_iothread();
3636}
3637
3638
3639void colo_release_ram_cache(void)
3640{
3641 RAMBlock *block;
3642
3643 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
3644 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3645 g_free(block->bmap);
3646 block->bmap = NULL;
3647 }
3648
3649 WITH_RCU_READ_LOCK_GUARD() {
3650 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3651 if (block->colo_cache) {
3652 qemu_anon_ram_free(block->colo_cache, block->used_length);
3653 block->colo_cache = NULL;
3654 }
3655 }
3656 }
3657 ram_state_cleanup(&ram_state);
3658}
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668static int ram_load_setup(QEMUFile *f, void *opaque)
3669{
3670 if (compress_threads_load_setup(f)) {
3671 return -1;
3672 }
3673
3674 xbzrle_load_setup();
3675 ramblock_recv_map_init();
3676
3677 return 0;
3678}
3679
3680static int ram_load_cleanup(void *opaque)
3681{
3682 RAMBlock *rb;
3683
3684 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3685 qemu_ram_block_writeback(rb);
3686 }
3687
3688 xbzrle_load_cleanup();
3689 compress_threads_load_cleanup();
3690
3691 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3692 g_free(rb->receivedmap);
3693 rb->receivedmap = NULL;
3694 }
3695
3696 return 0;
3697}
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3711{
3712 return postcopy_ram_incoming_init(mis);
3713}
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725static int ram_load_postcopy(QEMUFile *f)
3726{
3727 int flags = 0, ret = 0;
3728 bool place_needed = false;
3729 bool matches_target_page_size = false;
3730 MigrationIncomingState *mis = migration_incoming_get_current();
3731
3732 void *postcopy_host_page = mis->postcopy_tmp_page;
3733 void *host_page = NULL;
3734 bool all_zero = true;
3735 int target_pages = 0;
3736
3737 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3738 ram_addr_t addr;
3739 void *page_buffer = NULL;
3740 void *place_source = NULL;
3741 RAMBlock *block = NULL;
3742 uint8_t ch;
3743 int len;
3744
3745 addr = qemu_get_be64(f);
3746
3747
3748
3749
3750
3751 ret = qemu_file_get_error(f);
3752 if (ret) {
3753 break;
3754 }
3755
3756 flags = addr & ~TARGET_PAGE_MASK;
3757 addr &= TARGET_PAGE_MASK;
3758
3759 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3760 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3761 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
3762 block = ram_block_from_stream(f, flags);
3763 if (!block) {
3764 ret = -EINVAL;
3765 break;
3766 }
3767
3768
3769
3770
3771
3772
3773
3774 if (!block->host || addr >= block->postcopy_length) {
3775 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3776 ret = -EINVAL;
3777 break;
3778 }
3779 target_pages++;
3780 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791 page_buffer = postcopy_host_page +
3792 host_page_offset_from_ram_block_offset(block, addr);
3793
3794 if (target_pages == 1) {
3795 host_page = host_page_from_ram_block_offset(block, addr);
3796 } else if (host_page != host_page_from_ram_block_offset(block,
3797 addr)) {
3798
3799 error_report("Non-same host page %p/%p", host_page,
3800 host_page_from_ram_block_offset(block, addr));
3801 ret = -EINVAL;
3802 break;
3803 }
3804
3805
3806
3807
3808
3809 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3810 place_needed = true;
3811 }
3812 place_source = postcopy_host_page;
3813 }
3814
3815 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3816 case RAM_SAVE_FLAG_ZERO:
3817 ch = qemu_get_byte(f);
3818
3819
3820
3821
3822 if (ch || !matches_target_page_size) {
3823 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3824 }
3825 if (ch) {
3826 all_zero = false;
3827 }
3828 break;
3829
3830 case RAM_SAVE_FLAG_PAGE:
3831 all_zero = false;
3832 if (!matches_target_page_size) {
3833
3834 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3835 } else {
3836
3837
3838
3839
3840
3841
3842
3843
3844 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3845 TARGET_PAGE_SIZE);
3846 }
3847 break;
3848 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3849 all_zero = false;
3850 len = qemu_get_be32(f);
3851 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3852 error_report("Invalid compressed data length: %d", len);
3853 ret = -EINVAL;
3854 break;
3855 }
3856 decompress_data_with_multi_threads(f, page_buffer, len);
3857 break;
3858
3859 case RAM_SAVE_FLAG_EOS:
3860
3861 multifd_recv_sync_main();
3862 break;
3863 default:
3864 error_report("Unknown combination of migration flags: 0x%x"
3865 " (postcopy mode)", flags);
3866 ret = -EINVAL;
3867 break;
3868 }
3869
3870
3871 if (place_needed) {
3872 ret |= wait_for_decompress_done();
3873 }
3874
3875
3876 if (!ret && qemu_file_get_error(f)) {
3877 ret = qemu_file_get_error(f);
3878 }
3879
3880 if (!ret && place_needed) {
3881 if (all_zero) {
3882 ret = postcopy_place_page_zero(mis, host_page, block);
3883 } else {
3884 ret = postcopy_place_page(mis, host_page, place_source,
3885 block);
3886 }
3887 place_needed = false;
3888 target_pages = 0;
3889
3890 all_zero = true;
3891 }
3892 }
3893
3894 return ret;
3895}
3896
3897static bool postcopy_is_advised(void)
3898{
3899 PostcopyState ps = postcopy_state_get();
3900 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3901}
3902
3903static bool postcopy_is_running(void)
3904{
3905 PostcopyState ps = postcopy_state_get();
3906 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3907}
3908
3909
3910
3911
3912
3913void colo_flush_ram_cache(void)
3914{
3915 RAMBlock *block = NULL;
3916 void *dst_host;
3917 void *src_host;
3918 unsigned long offset = 0;
3919
3920 memory_global_dirty_log_sync();
3921 qemu_mutex_lock(&ram_state->bitmap_mutex);
3922 WITH_RCU_READ_LOCK_GUARD() {
3923 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3924 ramblock_sync_dirty_bitmap(ram_state, block);
3925 }
3926 }
3927
3928 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3929 WITH_RCU_READ_LOCK_GUARD() {
3930 block = QLIST_FIRST_RCU(&ram_list.blocks);
3931
3932 while (block) {
3933 unsigned long num = 0;
3934
3935 offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
3936 if (!offset_in_ramblock(block,
3937 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
3938 offset = 0;
3939 num = 0;
3940 block = QLIST_NEXT_RCU(block, next);
3941 } else {
3942 unsigned long i = 0;
3943
3944 for (i = 0; i < num; i++) {
3945 migration_bitmap_clear_dirty(ram_state, block, offset + i);
3946 }
3947 dst_host = block->host
3948 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3949 src_host = block->colo_cache
3950 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3951 memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
3952 offset += num;
3953 }
3954 }
3955 }
3956 trace_colo_flush_ram_cache_end();
3957 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3958}
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970static int ram_load_precopy(QEMUFile *f)
3971{
3972 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
3973
3974 bool postcopy_advised = postcopy_is_advised();
3975 if (!migrate_use_compression()) {
3976 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3977 }
3978
3979 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3980 ram_addr_t addr, total_ram_bytes;
3981 void *host = NULL, *host_bak = NULL;
3982 uint8_t ch;
3983
3984
3985
3986
3987
3988 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3989 aio_co_schedule(qemu_get_current_aio_context(),
3990 qemu_coroutine_self());
3991 qemu_coroutine_yield();
3992 }
3993 i++;
3994
3995 addr = qemu_get_be64(f);
3996 flags = addr & ~TARGET_PAGE_MASK;
3997 addr &= TARGET_PAGE_MASK;
3998
3999 if (flags & invalid_flags) {
4000 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4001 error_report("Received an unexpected compressed page");
4002 }
4003
4004 ret = -EINVAL;
4005 break;
4006 }
4007
4008 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4009 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4010 RAMBlock *block = ram_block_from_stream(f, flags);
4011
4012 host = host_from_ram_block_offset(block, addr);
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024 if (migration_incoming_colo_enabled()) {
4025 if (migration_incoming_in_colo_state()) {
4026
4027 host = colo_cache_from_block_offset(block, addr, true);
4028 } else {
4029
4030
4031
4032
4033 host_bak = colo_cache_from_block_offset(block, addr, false);
4034 }
4035 }
4036 if (!host) {
4037 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4038 ret = -EINVAL;
4039 break;
4040 }
4041 if (!migration_incoming_in_colo_state()) {
4042 ramblock_recv_bitmap_set(block, host);
4043 }
4044
4045 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
4046 }
4047
4048 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4049 case RAM_SAVE_FLAG_MEM_SIZE:
4050
4051 total_ram_bytes = addr;
4052 while (!ret && total_ram_bytes) {
4053 RAMBlock *block;
4054 char id[256];
4055 ram_addr_t length;
4056
4057 len = qemu_get_byte(f);
4058 qemu_get_buffer(f, (uint8_t *)id, len);
4059 id[len] = 0;
4060 length = qemu_get_be64(f);
4061
4062 block = qemu_ram_block_by_name(id);
4063 if (block && !qemu_ram_is_migratable(block)) {
4064 error_report("block %s should not be migrated !", id);
4065 ret = -EINVAL;
4066 } else if (block) {
4067 if (length != block->used_length) {
4068 Error *local_err = NULL;
4069
4070 ret = qemu_ram_resize(block, length,
4071 &local_err);
4072 if (local_err) {
4073 error_report_err(local_err);
4074 }
4075 }
4076
4077 if (postcopy_advised && migrate_postcopy_ram() &&
4078 block->page_size != qemu_host_page_size) {
4079 uint64_t remote_page_size = qemu_get_be64(f);
4080 if (remote_page_size != block->page_size) {
4081 error_report("Mismatched RAM page size %s "
4082 "(local) %zd != %" PRId64,
4083 id, block->page_size,
4084 remote_page_size);
4085 ret = -EINVAL;
4086 }
4087 }
4088 if (migrate_ignore_shared()) {
4089 hwaddr addr = qemu_get_be64(f);
4090 if (ramblock_is_ignored(block) &&
4091 block->mr->addr != addr) {
4092 error_report("Mismatched GPAs for block %s "
4093 "%" PRId64 "!= %" PRId64,
4094 id, (uint64_t)addr,
4095 (uint64_t)block->mr->addr);
4096 ret = -EINVAL;
4097 }
4098 }
4099 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4100 block->idstr);
4101 } else {
4102 error_report("Unknown ramblock \"%s\", cannot "
4103 "accept migration", id);
4104 ret = -EINVAL;
4105 }
4106
4107 total_ram_bytes -= length;
4108 }
4109 break;
4110
4111 case RAM_SAVE_FLAG_ZERO:
4112 ch = qemu_get_byte(f);
4113 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4114 break;
4115
4116 case RAM_SAVE_FLAG_PAGE:
4117 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4118 break;
4119
4120 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4121 len = qemu_get_be32(f);
4122 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4123 error_report("Invalid compressed data length: %d", len);
4124 ret = -EINVAL;
4125 break;
4126 }
4127 decompress_data_with_multi_threads(f, host, len);
4128 break;
4129
4130 case RAM_SAVE_FLAG_XBZRLE:
4131 if (load_xbzrle(f, addr, host) < 0) {
4132 error_report("Failed to decompress XBZRLE page at "
4133 RAM_ADDR_FMT, addr);
4134 ret = -EINVAL;
4135 break;
4136 }
4137 break;
4138 case RAM_SAVE_FLAG_EOS:
4139
4140 multifd_recv_sync_main();
4141 break;
4142 default:
4143 if (flags & RAM_SAVE_FLAG_HOOK) {
4144 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
4145 } else {
4146 error_report("Unknown combination of migration flags: 0x%x",
4147 flags);
4148 ret = -EINVAL;
4149 }
4150 }
4151 if (!ret) {
4152 ret = qemu_file_get_error(f);
4153 }
4154 if (!ret && host_bak) {
4155 memcpy(host_bak, host, TARGET_PAGE_SIZE);
4156 }
4157 }
4158
4159 ret |= wait_for_decompress_done();
4160 return ret;
4161}
4162
4163static int ram_load(QEMUFile *f, void *opaque, int version_id)
4164{
4165 int ret = 0;
4166 static uint64_t seq_iter;
4167
4168
4169
4170
4171 bool postcopy_running = postcopy_is_running();
4172
4173 seq_iter++;
4174
4175 if (version_id != 4) {
4176 return -EINVAL;
4177 }
4178
4179
4180
4181
4182
4183
4184
4185 WITH_RCU_READ_LOCK_GUARD() {
4186 if (postcopy_running) {
4187 ret = ram_load_postcopy(f);
4188 } else {
4189 ret = ram_load_precopy(f);
4190 }
4191 }
4192 trace_ram_load_complete(ret, seq_iter);
4193
4194 return ret;
4195}
4196
4197static bool ram_has_postcopy(void *opaque)
4198{
4199 RAMBlock *rb;
4200 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4201 if (ramblock_is_pmem(rb)) {
4202 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4203 "is not supported now!", rb->idstr, rb->host);
4204 return false;
4205 }
4206 }
4207
4208 return migrate_postcopy_ram();
4209}
4210
4211
4212static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4213{
4214 RAMBlock *block;
4215 QEMUFile *file = s->to_dst_file;
4216 int ramblock_count = 0;
4217
4218 trace_ram_dirty_bitmap_sync_start();
4219
4220 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4221 qemu_savevm_send_recv_bitmap(file, block->idstr);
4222 trace_ram_dirty_bitmap_request(block->idstr);
4223 ramblock_count++;
4224 }
4225
4226 trace_ram_dirty_bitmap_sync_wait();
4227
4228
4229 while (ramblock_count--) {
4230 qemu_sem_wait(&s->rp_state.rp_sem);
4231 }
4232
4233 trace_ram_dirty_bitmap_sync_complete();
4234
4235 return 0;
4236}
4237
4238static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4239{
4240 qemu_sem_post(&s->rp_state.rp_sem);
4241}
4242
4243
4244
4245
4246
4247
4248int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4249{
4250 int ret = -EINVAL;
4251
4252 QEMUFile *file = s->rp_state.from_dst_file;
4253 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4254 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4255 uint64_t size, end_mark;
4256
4257 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4258
4259 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4260 error_report("%s: incorrect state %s", __func__,
4261 MigrationStatus_str(s->state));
4262 return -EINVAL;
4263 }
4264
4265
4266
4267
4268
4269 local_size = ROUND_UP(local_size, 8);
4270
4271
4272 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4273
4274 size = qemu_get_be64(file);
4275
4276
4277 if (size != local_size) {
4278 error_report("%s: ramblock '%s' bitmap size mismatch "
4279 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4280 block->idstr, size, local_size);
4281 ret = -EINVAL;
4282 goto out;
4283 }
4284
4285 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4286 end_mark = qemu_get_be64(file);
4287
4288 ret = qemu_file_get_error(file);
4289 if (ret || size != local_size) {
4290 error_report("%s: read bitmap failed for ramblock '%s': %d"
4291 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4292 __func__, block->idstr, ret, local_size, size);
4293 ret = -EIO;
4294 goto out;
4295 }
4296
4297 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4298 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
4299 __func__, block->idstr, end_mark);
4300 ret = -EINVAL;
4301 goto out;
4302 }
4303
4304
4305
4306
4307
4308 bitmap_from_le(block->bmap, le_bitmap, nbits);
4309
4310
4311
4312
4313
4314 bitmap_complement(block->bmap, block->bmap, nbits);
4315
4316
4317 ramblock_dirty_bitmap_clear_discarded_pages(block);
4318
4319
4320 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4321
4322
4323
4324
4325
4326 ram_dirty_bitmap_reload_notify(s);
4327
4328 ret = 0;
4329out:
4330 g_free(le_bitmap);
4331 return ret;
4332}
4333
4334static int ram_resume_prepare(MigrationState *s, void *opaque)
4335{
4336 RAMState *rs = *(RAMState **)opaque;
4337 int ret;
4338
4339 ret = ram_dirty_bitmap_sync_all(s, rs);
4340 if (ret) {
4341 return ret;
4342 }
4343
4344 ram_state_resume_prepare(rs, s->to_dst_file);
4345
4346 return 0;
4347}
4348
4349static SaveVMHandlers savevm_ram_handlers = {
4350 .save_setup = ram_save_setup,
4351 .save_live_iterate = ram_save_iterate,
4352 .save_live_complete_postcopy = ram_save_complete,
4353 .save_live_complete_precopy = ram_save_complete,
4354 .has_postcopy = ram_has_postcopy,
4355 .save_live_pending = ram_save_pending,
4356 .load_state = ram_load,
4357 .save_cleanup = ram_save_cleanup,
4358 .load_setup = ram_load_setup,
4359 .load_cleanup = ram_load_cleanup,
4360 .resume_prepare = ram_resume_prepare,
4361};
4362
4363static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4364 size_t old_size, size_t new_size)
4365{
4366 PostcopyState ps = postcopy_state_get();
4367 ram_addr_t offset;
4368 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4369 Error *err = NULL;
4370
4371 if (ramblock_is_ignored(rb)) {
4372 return;
4373 }
4374
4375 if (!migration_is_idle()) {
4376
4377
4378
4379
4380
4381
4382 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4383 migration_cancel(err);
4384 error_free(err);
4385 }
4386
4387 switch (ps) {
4388 case POSTCOPY_INCOMING_ADVISE:
4389
4390
4391
4392
4393
4394 if (old_size < new_size) {
4395 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4396 error_report("RAM block '%s' discard of resized RAM failed",
4397 rb->idstr);
4398 }
4399 }
4400 rb->postcopy_length = new_size;
4401 break;
4402 case POSTCOPY_INCOMING_NONE:
4403 case POSTCOPY_INCOMING_RUNNING:
4404 case POSTCOPY_INCOMING_END:
4405
4406
4407
4408
4409
4410 break;
4411 default:
4412 error_report("RAM block '%s' resized during postcopy state: %d",
4413 rb->idstr, ps);
4414 exit(-1);
4415 }
4416}
4417
4418static RAMBlockNotifier ram_mig_ram_notifier = {
4419 .ram_block_resized = ram_mig_ram_block_resized,
4420};
4421
4422void ram_mig_init(void)
4423{
4424 qemu_mutex_init(&XBZRLE.lock);
4425 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4426 ram_block_notifier_add(&ram_mig_ram_notifier);
4427}
4428