1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "qemu/cutils.h"
31#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
33#include "qemu/madvise.h"
34#include "qemu/main-loop.h"
35#include "xbzrle.h"
36#include "ram.h"
37#include "migration.h"
38#include "migration/register.h"
39#include "migration/misc.h"
40#include "qemu-file.h"
41#include "postcopy-ram.h"
42#include "page_cache.h"
43#include "qemu/error-report.h"
44#include "qapi/error.h"
45#include "qapi/qapi-types-migration.h"
46#include "qapi/qapi-events-migration.h"
47#include "qapi/qmp/qerror.h"
48#include "trace.h"
49#include "exec/ram_addr.h"
50#include "exec/target_page.h"
51#include "qemu/rcu_queue.h"
52#include "migration/colo.h"
53#include "block.h"
54#include "sysemu/cpu-throttle.h"
55#include "savevm.h"
56#include "qemu/iov.h"
57#include "multifd.h"
58#include "sysemu/runstate.h"
59
60#include "hw/boards.h"
61
62#if defined(__linux__)
63#include "qemu/userfaultfd.h"
64#endif
65
66
67
68
69
70
71
72
73
74
75#define RAM_SAVE_FLAG_FULL 0x01
76#define RAM_SAVE_FLAG_ZERO 0x02
77#define RAM_SAVE_FLAG_MEM_SIZE 0x04
78#define RAM_SAVE_FLAG_PAGE 0x08
79#define RAM_SAVE_FLAG_EOS 0x10
80#define RAM_SAVE_FLAG_CONTINUE 0x20
81#define RAM_SAVE_FLAG_XBZRLE 0x40
82
83#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
84
85XBZRLECacheStats xbzrle_counters;
86
87
88
89static struct {
90
91 uint8_t *encoded_buf;
92
93 uint8_t *current_buf;
94
95 PageCache *cache;
96 QemuMutex lock;
97
98 uint8_t *zero_target_page;
99
100 uint8_t *decoded_buf;
101} XBZRLE;
102
103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle()) {
106 qemu_mutex_lock(&XBZRLE.lock);
107 }
108}
109
110static void XBZRLE_cache_unlock(void)
111{
112 if (migrate_use_xbzrle()) {
113 qemu_mutex_unlock(&XBZRLE.lock);
114 }
115}
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130int xbzrle_cache_resize(uint64_t new_size, Error **errp)
131{
132 PageCache *new_cache;
133 int64_t ret = 0;
134
135
136 if (new_size != (size_t)new_size) {
137 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
138 "exceeding address space");
139 return -1;
140 }
141
142 if (new_size == migrate_xbzrle_cache_size()) {
143
144 return 0;
145 }
146
147 XBZRLE_cache_lock();
148
149 if (XBZRLE.cache != NULL) {
150 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
151 if (!new_cache) {
152 ret = -1;
153 goto out;
154 }
155
156 cache_fini(XBZRLE.cache);
157 XBZRLE.cache = new_cache;
158 }
159out:
160 XBZRLE_cache_unlock();
161 return ret;
162}
163
164bool ramblock_is_ignored(RAMBlock *block)
165{
166 return !qemu_ram_is_migratable(block) ||
167 (migrate_ignore_shared() && qemu_ram_is_shared(block));
168}
169
170#undef RAMBLOCK_FOREACH
171
172int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
173{
174 RAMBlock *block;
175 int ret = 0;
176
177 RCU_READ_LOCK_GUARD();
178
179 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
180 ret = func(block, opaque);
181 if (ret) {
182 break;
183 }
184 }
185 return ret;
186}
187
188static void ramblock_recv_map_init(void)
189{
190 RAMBlock *rb;
191
192 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
193 assert(!rb->receivedmap);
194 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
195 }
196}
197
198int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
199{
200 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
201 rb->receivedmap);
202}
203
204bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
205{
206 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
207}
208
209void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
210{
211 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
212}
213
214void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
215 size_t nr)
216{
217 bitmap_set_atomic(rb->receivedmap,
218 ramblock_recv_bitmap_offset(host_addr, rb),
219 nr);
220}
221
222#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
223
224
225
226
227
228
229int64_t ramblock_recv_bitmap_send(QEMUFile *file,
230 const char *block_name)
231{
232 RAMBlock *block = qemu_ram_block_by_name(block_name);
233 unsigned long *le_bitmap, nbits;
234 uint64_t size;
235
236 if (!block) {
237 error_report("%s: invalid block name: %s", __func__, block_name);
238 return -1;
239 }
240
241 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
242
243
244
245
246
247
248 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
249
250
251
252
253
254
255 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
256
257
258 size = DIV_ROUND_UP(nbits, 8);
259
260
261
262
263
264
265
266 size = ROUND_UP(size, 8);
267
268 qemu_put_be64(file, size);
269 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
270
271
272
273
274 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
275 qemu_fflush(file);
276
277 g_free(le_bitmap);
278
279 if (qemu_file_get_error(file)) {
280 return qemu_file_get_error(file);
281 }
282
283 return size + sizeof(size);
284}
285
286
287
288
289
290struct RAMSrcPageRequest {
291 RAMBlock *rb;
292 hwaddr offset;
293 hwaddr len;
294
295 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
296};
297
298
299struct RAMState {
300
301 QEMUFile *f;
302
303 int uffdio_fd;
304
305 RAMBlock *last_seen_block;
306
307 RAMBlock *last_sent_block;
308
309 ram_addr_t last_page;
310
311 uint32_t last_version;
312
313 int dirty_rate_high_cnt;
314
315
316 int64_t time_last_bitmap_sync;
317
318 uint64_t bytes_xfer_prev;
319
320 uint64_t num_dirty_pages_period;
321
322 uint64_t xbzrle_cache_miss_prev;
323
324 uint64_t xbzrle_pages_prev;
325
326 uint64_t xbzrle_bytes_prev;
327
328 bool xbzrle_enabled;
329
330 bool last_stage;
331
332
333 uint64_t compress_thread_busy_prev;
334
335 uint64_t compressed_size_prev;
336
337 uint64_t compress_pages_prev;
338
339
340 uint64_t target_page_count_prev;
341
342 uint64_t target_page_count;
343
344 uint64_t migration_dirty_pages;
345
346 QemuMutex bitmap_mutex;
347
348 RAMBlock *last_req_rb;
349
350 QemuMutex src_page_req_mutex;
351 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
352};
353typedef struct RAMState RAMState;
354
355static RAMState *ram_state;
356
357static NotifierWithReturnList precopy_notifier_list;
358
359
360static bool postcopy_has_request(RAMState *rs)
361{
362 return !QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests);
363}
364
365void precopy_infrastructure_init(void)
366{
367 notifier_with_return_list_init(&precopy_notifier_list);
368}
369
370void precopy_add_notifier(NotifierWithReturn *n)
371{
372 notifier_with_return_list_add(&precopy_notifier_list, n);
373}
374
375void precopy_remove_notifier(NotifierWithReturn *n)
376{
377 notifier_with_return_remove(n);
378}
379
380int precopy_notify(PrecopyNotifyReason reason, Error **errp)
381{
382 PrecopyNotifyData pnd;
383 pnd.reason = reason;
384 pnd.errp = errp;
385
386 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
387}
388
389uint64_t ram_bytes_remaining(void)
390{
391 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
392 0;
393}
394
395MigrationStats ram_counters;
396
397static void ram_transferred_add(uint64_t bytes)
398{
399 if (runstate_is_running()) {
400 ram_counters.precopy_bytes += bytes;
401 } else if (migration_in_postcopy()) {
402 ram_counters.postcopy_bytes += bytes;
403 } else {
404 ram_counters.downtime_bytes += bytes;
405 }
406 ram_counters.transferred += bytes;
407}
408
409
410struct PageSearchStatus {
411
412 RAMBlock *block;
413
414 unsigned long page;
415
416 bool complete_round;
417};
418typedef struct PageSearchStatus PageSearchStatus;
419
420CompressionStats compression_counters;
421
422struct CompressParam {
423 bool done;
424 bool quit;
425 bool zero_page;
426 QEMUFile *file;
427 QemuMutex mutex;
428 QemuCond cond;
429 RAMBlock *block;
430 ram_addr_t offset;
431
432
433 z_stream stream;
434 uint8_t *originbuf;
435};
436typedef struct CompressParam CompressParam;
437
438struct DecompressParam {
439 bool done;
440 bool quit;
441 QemuMutex mutex;
442 QemuCond cond;
443 void *des;
444 uint8_t *compbuf;
445 int len;
446 z_stream stream;
447};
448typedef struct DecompressParam DecompressParam;
449
450static CompressParam *comp_param;
451static QemuThread *compress_threads;
452
453
454
455
456static QemuMutex comp_done_lock;
457static QemuCond comp_done_cond;
458
459static const QEMUFileOps empty_ops = { };
460
461static QEMUFile *decomp_file;
462static DecompressParam *decomp_param;
463static QemuThread *decompress_threads;
464static QemuMutex decomp_done_lock;
465static QemuCond decomp_done_cond;
466
467static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
468 ram_addr_t offset, uint8_t *source_buf);
469
470static void *do_data_compress(void *opaque)
471{
472 CompressParam *param = opaque;
473 RAMBlock *block;
474 ram_addr_t offset;
475 bool zero_page;
476
477 qemu_mutex_lock(¶m->mutex);
478 while (!param->quit) {
479 if (param->block) {
480 block = param->block;
481 offset = param->offset;
482 param->block = NULL;
483 qemu_mutex_unlock(¶m->mutex);
484
485 zero_page = do_compress_ram_page(param->file, ¶m->stream,
486 block, offset, param->originbuf);
487
488 qemu_mutex_lock(&comp_done_lock);
489 param->done = true;
490 param->zero_page = zero_page;
491 qemu_cond_signal(&comp_done_cond);
492 qemu_mutex_unlock(&comp_done_lock);
493
494 qemu_mutex_lock(¶m->mutex);
495 } else {
496 qemu_cond_wait(¶m->cond, ¶m->mutex);
497 }
498 }
499 qemu_mutex_unlock(¶m->mutex);
500
501 return NULL;
502}
503
504static void compress_threads_save_cleanup(void)
505{
506 int i, thread_count;
507
508 if (!migrate_use_compression() || !comp_param) {
509 return;
510 }
511
512 thread_count = migrate_compress_threads();
513 for (i = 0; i < thread_count; i++) {
514
515
516
517
518 if (!comp_param[i].file) {
519 break;
520 }
521
522 qemu_mutex_lock(&comp_param[i].mutex);
523 comp_param[i].quit = true;
524 qemu_cond_signal(&comp_param[i].cond);
525 qemu_mutex_unlock(&comp_param[i].mutex);
526
527 qemu_thread_join(compress_threads + i);
528 qemu_mutex_destroy(&comp_param[i].mutex);
529 qemu_cond_destroy(&comp_param[i].cond);
530 deflateEnd(&comp_param[i].stream);
531 g_free(comp_param[i].originbuf);
532 qemu_fclose(comp_param[i].file);
533 comp_param[i].file = NULL;
534 }
535 qemu_mutex_destroy(&comp_done_lock);
536 qemu_cond_destroy(&comp_done_cond);
537 g_free(compress_threads);
538 g_free(comp_param);
539 compress_threads = NULL;
540 comp_param = NULL;
541}
542
543static int compress_threads_save_setup(void)
544{
545 int i, thread_count;
546
547 if (!migrate_use_compression()) {
548 return 0;
549 }
550 thread_count = migrate_compress_threads();
551 compress_threads = g_new0(QemuThread, thread_count);
552 comp_param = g_new0(CompressParam, thread_count);
553 qemu_cond_init(&comp_done_cond);
554 qemu_mutex_init(&comp_done_lock);
555 for (i = 0; i < thread_count; i++) {
556 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
557 if (!comp_param[i].originbuf) {
558 goto exit;
559 }
560
561 if (deflateInit(&comp_param[i].stream,
562 migrate_compress_level()) != Z_OK) {
563 g_free(comp_param[i].originbuf);
564 goto exit;
565 }
566
567
568
569
570 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
571 comp_param[i].done = true;
572 comp_param[i].quit = false;
573 qemu_mutex_init(&comp_param[i].mutex);
574 qemu_cond_init(&comp_param[i].cond);
575 qemu_thread_create(compress_threads + i, "compress",
576 do_data_compress, comp_param + i,
577 QEMU_THREAD_JOINABLE);
578 }
579 return 0;
580
581exit:
582 compress_threads_save_cleanup();
583 return -1;
584}
585
586
587
588
589
590
591
592
593
594
595
596
597
598static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
599 ram_addr_t offset)
600{
601 size_t size, len;
602
603 if (block == rs->last_sent_block) {
604 offset |= RAM_SAVE_FLAG_CONTINUE;
605 }
606 qemu_put_be64(f, offset);
607 size = 8;
608
609 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
610 len = strlen(block->idstr);
611 qemu_put_byte(f, len);
612 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
613 size += 1 + len;
614 rs->last_sent_block = block;
615 }
616 return size;
617}
618
619
620
621
622
623
624
625
626
627
628static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
629 uint64_t bytes_dirty_threshold)
630{
631 MigrationState *s = migrate_get_current();
632 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
633 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
634 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
635 int pct_max = s->parameters.max_cpu_throttle;
636
637 uint64_t throttle_now = cpu_throttle_get_percentage();
638 uint64_t cpu_now, cpu_ideal, throttle_inc;
639
640
641 if (!cpu_throttle_active()) {
642 cpu_throttle_set(pct_initial);
643 } else {
644
645 if (!pct_tailslow) {
646 throttle_inc = pct_increment;
647 } else {
648
649
650 cpu_now = 100 - throttle_now;
651 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
652 bytes_dirty_period);
653 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
654 }
655 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
656 }
657}
658
659void mig_throttle_counter_reset(void)
660{
661 RAMState *rs = ram_state;
662
663 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
664 rs->num_dirty_pages_period = 0;
665 rs->bytes_xfer_prev = ram_counters.transferred;
666}
667
668
669
670
671
672
673
674
675
676
677
678
679
680static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
681{
682 if (!rs->xbzrle_enabled) {
683 return;
684 }
685
686
687
688 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
689 ram_counters.dirty_sync_count);
690}
691
692#define ENCODING_FLAG_XBZRLE 0x1
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
708 ram_addr_t current_addr, RAMBlock *block,
709 ram_addr_t offset)
710{
711 int encoded_len = 0, bytes_xbzrle;
712 uint8_t *prev_cached_page;
713
714 if (!cache_is_cached(XBZRLE.cache, current_addr,
715 ram_counters.dirty_sync_count)) {
716 xbzrle_counters.cache_miss++;
717 if (!rs->last_stage) {
718 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
719 ram_counters.dirty_sync_count) == -1) {
720 return -1;
721 } else {
722
723
724 *current_data = get_cached_data(XBZRLE.cache, current_addr);
725 }
726 }
727 return -1;
728 }
729
730
731
732
733
734
735
736
737
738
739
740
741 xbzrle_counters.pages++;
742 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
743
744
745 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
746
747
748 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
749 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
750 TARGET_PAGE_SIZE);
751
752
753
754
755
756 if (!rs->last_stage && encoded_len != 0) {
757 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
758
759
760
761
762
763 *current_data = prev_cached_page;
764 }
765
766 if (encoded_len == 0) {
767 trace_save_xbzrle_page_skipping();
768 return 0;
769 } else if (encoded_len == -1) {
770 trace_save_xbzrle_page_overflow();
771 xbzrle_counters.overflow++;
772 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
773 return -1;
774 }
775
776
777 bytes_xbzrle = save_page_header(rs, rs->f, block,
778 offset | RAM_SAVE_FLAG_XBZRLE);
779 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
780 qemu_put_be16(rs->f, encoded_len);
781 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
782 bytes_xbzrle += encoded_len + 1 + 2;
783
784
785
786
787
788 xbzrle_counters.bytes += bytes_xbzrle - 8;
789 ram_transferred_add(bytes_xbzrle);
790
791 return 1;
792}
793
794
795
796
797
798
799
800
801
802
803static inline
804unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
805 unsigned long start)
806{
807 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
808 unsigned long *bitmap = rb->bmap;
809
810 if (ramblock_is_ignored(rb)) {
811 return size;
812 }
813
814 return find_next_bit(bitmap, size, start);
815}
816
817static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
818 unsigned long page)
819{
820 uint8_t shift;
821 hwaddr size, start;
822
823 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
824 return;
825 }
826
827 shift = rb->clear_bmap_shift;
828
829
830
831
832
833
834
835
836 assert(shift >= 6);
837
838 size = 1ULL << (TARGET_PAGE_BITS + shift);
839 start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
840 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
841 memory_region_clear_dirty_bitmap(rb->mr, start, size);
842}
843
844static void
845migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
846 unsigned long start,
847 unsigned long npages)
848{
849 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
850 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
851 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
852
853
854
855
856
857 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
858 migration_clear_memory_region_dirty_bitmap(rb, i);
859 }
860}
861
862
863
864
865
866
867
868
869
870
871
872
873static inline
874unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
875 unsigned long start, unsigned long *num)
876{
877 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
878 unsigned long *bitmap = rb->bmap;
879 unsigned long first, next;
880
881 *num = 0;
882
883 if (ramblock_is_ignored(rb)) {
884 return size;
885 }
886
887 first = find_next_bit(bitmap, size, start);
888 if (first >= size) {
889 return first;
890 }
891 next = find_next_zero_bit(bitmap, size, first + 1);
892 assert(next >= first);
893 *num = next - first;
894 return first;
895}
896
897static inline bool migration_bitmap_clear_dirty(RAMState *rs,
898 RAMBlock *rb,
899 unsigned long page)
900{
901 bool ret;
902
903
904
905
906
907
908
909
910
911 migration_clear_memory_region_dirty_bitmap(rb, page);
912
913 ret = test_and_clear_bit(page, rb->bmap);
914 if (ret) {
915 rs->migration_dirty_pages--;
916 }
917
918 return ret;
919}
920
921static void dirty_bitmap_clear_section(MemoryRegionSection *section,
922 void *opaque)
923{
924 const hwaddr offset = section->offset_within_region;
925 const hwaddr size = int128_get64(section->size);
926 const unsigned long start = offset >> TARGET_PAGE_BITS;
927 const unsigned long npages = size >> TARGET_PAGE_BITS;
928 RAMBlock *rb = section->mr->ram_block;
929 uint64_t *cleared_bits = opaque;
930
931
932
933
934
935
936 if (!migration_in_postcopy() && !migrate_background_snapshot()) {
937 migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
938 }
939 *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
940 bitmap_clear(rb->bmap, start, npages);
941}
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
957{
958 uint64_t cleared_bits = 0;
959
960 if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
961 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
962 MemoryRegionSection section = {
963 .mr = rb->mr,
964 .offset_within_region = 0,
965 .size = int128_make64(qemu_ram_get_used_length(rb)),
966 };
967
968 ram_discard_manager_replay_discarded(rdm, §ion,
969 dirty_bitmap_clear_section,
970 &cleared_bits);
971 }
972 return cleared_bits;
973}
974
975
976
977
978
979
980
981bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
982{
983 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
984 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
985 MemoryRegionSection section = {
986 .mr = rb->mr,
987 .offset_within_region = start,
988 .size = int128_make64(qemu_ram_pagesize(rb)),
989 };
990
991 return !ram_discard_manager_is_populated(rdm, §ion);
992 }
993 return false;
994}
995
996
997static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
998{
999 uint64_t new_dirty_pages =
1000 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
1001
1002 rs->migration_dirty_pages += new_dirty_pages;
1003 rs->num_dirty_pages_period += new_dirty_pages;
1004}
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015uint64_t ram_pagesize_summary(void)
1016{
1017 RAMBlock *block;
1018 uint64_t summary = 0;
1019
1020 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1021 summary |= block->page_size;
1022 }
1023
1024 return summary;
1025}
1026
1027uint64_t ram_get_total_transferred_pages(void)
1028{
1029 return ram_counters.normal + ram_counters.duplicate +
1030 compression_counters.pages + xbzrle_counters.pages;
1031}
1032
1033static void migration_update_rates(RAMState *rs, int64_t end_time)
1034{
1035 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
1036 double compressed_size;
1037
1038
1039 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1040 / (end_time - rs->time_last_bitmap_sync);
1041
1042 if (!page_count) {
1043 return;
1044 }
1045
1046 if (migrate_use_xbzrle()) {
1047 double encoded_size, unencoded_size;
1048
1049 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1050 rs->xbzrle_cache_miss_prev) / page_count;
1051 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1052 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
1053 TARGET_PAGE_SIZE;
1054 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
1055 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
1056 xbzrle_counters.encoding_rate = 0;
1057 } else {
1058 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
1059 }
1060 rs->xbzrle_pages_prev = xbzrle_counters.pages;
1061 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
1062 }
1063
1064 if (migrate_use_compression()) {
1065 compression_counters.busy_rate = (double)(compression_counters.busy -
1066 rs->compress_thread_busy_prev) / page_count;
1067 rs->compress_thread_busy_prev = compression_counters.busy;
1068
1069 compressed_size = compression_counters.compressed_size -
1070 rs->compressed_size_prev;
1071 if (compressed_size) {
1072 double uncompressed_size = (compression_counters.pages -
1073 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1074
1075
1076 compression_counters.compression_rate =
1077 uncompressed_size / compressed_size;
1078
1079 rs->compress_pages_prev = compression_counters.pages;
1080 rs->compressed_size_prev = compression_counters.compressed_size;
1081 }
1082 }
1083}
1084
1085static void migration_trigger_throttle(RAMState *rs)
1086{
1087 MigrationState *s = migrate_get_current();
1088 uint64_t threshold = s->parameters.throttle_trigger_threshold;
1089
1090 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
1091 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1092 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1093
1094
1095
1096
1097 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1098
1099
1100
1101
1102
1103
1104 if ((bytes_dirty_period > bytes_dirty_threshold) &&
1105 (++rs->dirty_rate_high_cnt >= 2)) {
1106 trace_migration_throttle();
1107 rs->dirty_rate_high_cnt = 0;
1108 mig_throttle_guest_down(bytes_dirty_period,
1109 bytes_dirty_threshold);
1110 }
1111 }
1112}
1113
1114static void migration_bitmap_sync(RAMState *rs)
1115{
1116 RAMBlock *block;
1117 int64_t end_time;
1118
1119 ram_counters.dirty_sync_count++;
1120
1121 if (!rs->time_last_bitmap_sync) {
1122 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1123 }
1124
1125 trace_migration_bitmap_sync_start();
1126 memory_global_dirty_log_sync();
1127
1128 qemu_mutex_lock(&rs->bitmap_mutex);
1129 WITH_RCU_READ_LOCK_GUARD() {
1130 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1131 ramblock_sync_dirty_bitmap(rs, block);
1132 }
1133 ram_counters.remaining = ram_bytes_remaining();
1134 }
1135 qemu_mutex_unlock(&rs->bitmap_mutex);
1136
1137 memory_global_after_dirty_log_sync();
1138 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1139
1140 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1141
1142
1143 if (end_time > rs->time_last_bitmap_sync + 1000) {
1144 migration_trigger_throttle(rs);
1145
1146 migration_update_rates(rs, end_time);
1147
1148 rs->target_page_count_prev = rs->target_page_count;
1149
1150
1151 rs->time_last_bitmap_sync = end_time;
1152 rs->num_dirty_pages_period = 0;
1153 rs->bytes_xfer_prev = ram_counters.transferred;
1154 }
1155 if (migrate_use_events()) {
1156 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1157 }
1158}
1159
1160static void migration_bitmap_sync_precopy(RAMState *rs)
1161{
1162 Error *local_err = NULL;
1163
1164
1165
1166
1167
1168 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1169 error_report_err(local_err);
1170 local_err = NULL;
1171 }
1172
1173 migration_bitmap_sync(rs);
1174
1175 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1176 error_report_err(local_err);
1177 }
1178}
1179
1180static void ram_release_page(const char *rbname, uint64_t offset)
1181{
1182 if (!migrate_release_ram() || !migration_in_postcopy()) {
1183 return;
1184 }
1185
1186 ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);
1187}
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1201 RAMBlock *block, ram_addr_t offset)
1202{
1203 uint8_t *p = block->host + offset;
1204 int len = 0;
1205
1206 if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
1207 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1208 qemu_put_byte(file, 0);
1209 len += 1;
1210 ram_release_page(block->idstr, offset);
1211 }
1212 return len;
1213}
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1225{
1226 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1227
1228 if (len) {
1229 ram_counters.duplicate++;
1230 ram_transferred_add(len);
1231 return 1;
1232 }
1233 return -1;
1234}
1235
1236
1237
1238
1239
1240
1241
1242
1243static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1244 int *pages)
1245{
1246 uint64_t bytes_xmit = 0;
1247 int ret;
1248
1249 *pages = -1;
1250 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1251 &bytes_xmit);
1252 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1253 return false;
1254 }
1255
1256 if (bytes_xmit) {
1257 ram_transferred_add(bytes_xmit);
1258 *pages = 1;
1259 }
1260
1261 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1262 return true;
1263 }
1264
1265 if (bytes_xmit > 0) {
1266 ram_counters.normal++;
1267 } else if (bytes_xmit == 0) {
1268 ram_counters.duplicate++;
1269 }
1270
1271 return true;
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1286 uint8_t *buf, bool async)
1287{
1288 ram_transferred_add(save_page_header(rs, rs->f, block,
1289 offset | RAM_SAVE_FLAG_PAGE));
1290 if (async) {
1291 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1292 migrate_release_ram() &
1293 migration_in_postcopy());
1294 } else {
1295 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1296 }
1297 ram_transferred_add(TARGET_PAGE_SIZE);
1298 ram_counters.normal++;
1299 return 1;
1300}
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
1315{
1316 int pages = -1;
1317 uint8_t *p;
1318 bool send_async = true;
1319 RAMBlock *block = pss->block;
1320 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1321 ram_addr_t current_addr = block->offset + offset;
1322
1323 p = block->host + offset;
1324 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1325
1326 XBZRLE_cache_lock();
1327 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
1328 pages = save_xbzrle_page(rs, &p, current_addr, block,
1329 offset);
1330 if (!rs->last_stage) {
1331
1332
1333
1334 send_async = false;
1335 }
1336 }
1337
1338
1339 if (pages == -1) {
1340 pages = save_normal_page(rs, block, offset, p, send_async);
1341 }
1342
1343 XBZRLE_cache_unlock();
1344
1345 return pages;
1346}
1347
1348static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1349 ram_addr_t offset)
1350{
1351 if (multifd_queue_page(rs->f, block, offset) < 0) {
1352 return -1;
1353 }
1354 ram_counters.normal++;
1355
1356 return 1;
1357}
1358
1359static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1360 ram_addr_t offset, uint8_t *source_buf)
1361{
1362 RAMState *rs = ram_state;
1363 uint8_t *p = block->host + offset;
1364 int ret;
1365
1366 if (save_zero_page_to_file(rs, f, block, offset)) {
1367 return true;
1368 }
1369
1370 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1371
1372
1373
1374
1375
1376
1377 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1378 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1379 if (ret < 0) {
1380 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1381 error_report("compressed data failed!");
1382 }
1383 return false;
1384}
1385
1386static void
1387update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1388{
1389 ram_transferred_add(bytes_xmit);
1390
1391 if (param->zero_page) {
1392 ram_counters.duplicate++;
1393 return;
1394 }
1395
1396
1397 compression_counters.compressed_size += bytes_xmit - 8;
1398 compression_counters.pages++;
1399}
1400
1401static bool save_page_use_compression(RAMState *rs);
1402
1403static void flush_compressed_data(RAMState *rs)
1404{
1405 int idx, len, thread_count;
1406
1407 if (!save_page_use_compression(rs)) {
1408 return;
1409 }
1410 thread_count = migrate_compress_threads();
1411
1412 qemu_mutex_lock(&comp_done_lock);
1413 for (idx = 0; idx < thread_count; idx++) {
1414 while (!comp_param[idx].done) {
1415 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1416 }
1417 }
1418 qemu_mutex_unlock(&comp_done_lock);
1419
1420 for (idx = 0; idx < thread_count; idx++) {
1421 qemu_mutex_lock(&comp_param[idx].mutex);
1422 if (!comp_param[idx].quit) {
1423 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1424
1425
1426
1427
1428
1429 update_compress_thread_counts(&comp_param[idx], len);
1430 }
1431 qemu_mutex_unlock(&comp_param[idx].mutex);
1432 }
1433}
1434
1435static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1436 ram_addr_t offset)
1437{
1438 param->block = block;
1439 param->offset = offset;
1440}
1441
1442static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1443 ram_addr_t offset)
1444{
1445 int idx, thread_count, bytes_xmit = -1, pages = -1;
1446 bool wait = migrate_compress_wait_thread();
1447
1448 thread_count = migrate_compress_threads();
1449 qemu_mutex_lock(&comp_done_lock);
1450retry:
1451 for (idx = 0; idx < thread_count; idx++) {
1452 if (comp_param[idx].done) {
1453 comp_param[idx].done = false;
1454 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1455 qemu_mutex_lock(&comp_param[idx].mutex);
1456 set_compress_params(&comp_param[idx], block, offset);
1457 qemu_cond_signal(&comp_param[idx].cond);
1458 qemu_mutex_unlock(&comp_param[idx].mutex);
1459 pages = 1;
1460 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1461 break;
1462 }
1463 }
1464
1465
1466
1467
1468
1469 if (pages < 0 && wait) {
1470 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1471 goto retry;
1472 }
1473 qemu_mutex_unlock(&comp_done_lock);
1474
1475 return pages;
1476}
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1489{
1490 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1491 if (pss->complete_round && pss->block == rs->last_seen_block &&
1492 pss->page >= rs->last_page) {
1493
1494
1495
1496
1497 *again = false;
1498 return false;
1499 }
1500 if (!offset_in_ramblock(pss->block,
1501 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
1502
1503 pss->page = 0;
1504 pss->block = QLIST_NEXT_RCU(pss->block, next);
1505 if (!pss->block) {
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515 flush_compressed_data(rs);
1516
1517
1518 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1519
1520 pss->complete_round = true;
1521
1522 if (migrate_use_xbzrle()) {
1523 rs->xbzrle_enabled = true;
1524 }
1525 }
1526
1527 *again = true;
1528 return false;
1529 } else {
1530
1531 *again = true;
1532
1533 return true;
1534 }
1535}
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1548{
1549 struct RAMSrcPageRequest *entry;
1550 RAMBlock *block = NULL;
1551 size_t page_size;
1552
1553 if (!postcopy_has_request(rs)) {
1554 return NULL;
1555 }
1556
1557 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1558
1559
1560
1561
1562
1563 assert(postcopy_has_request(rs));
1564
1565 entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
1566 block = entry->rb;
1567 *offset = entry->offset;
1568 page_size = qemu_ram_pagesize(block);
1569
1570 assert((entry->len % page_size) == 0);
1571
1572 if (entry->len > page_size) {
1573 entry->len -= page_size;
1574 entry->offset += page_size;
1575 } else {
1576 memory_region_unref(block->mr);
1577 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1578 g_free(entry);
1579 migration_consume_urgent_request();
1580 }
1581
1582 trace_unqueue_page(block->idstr, *offset,
1583 test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
1584
1585 return block;
1586}
1587
1588#if defined(__linux__)
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1600{
1601 struct uffd_msg uffd_msg;
1602 void *page_address;
1603 RAMBlock *block;
1604 int res;
1605
1606 if (!migrate_background_snapshot()) {
1607 return NULL;
1608 }
1609
1610 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1611 if (res <= 0) {
1612 return NULL;
1613 }
1614
1615 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
1616 block = qemu_ram_block_from_host(page_address, false, offset);
1617 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1618 return block;
1619}
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1632 unsigned long start_page)
1633{
1634 int res = 0;
1635
1636
1637 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1638 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1639 uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS;
1640
1641
1642 qemu_fflush(rs->f);
1643
1644 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1645 false, false);
1646 }
1647
1648 return res;
1649}
1650
1651
1652
1653
1654
1655bool ram_write_tracking_available(void)
1656{
1657 uint64_t uffd_features;
1658 int res;
1659
1660 res = uffd_query_features(&uffd_features);
1661 return (res == 0 &&
1662 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1663}
1664
1665
1666
1667
1668
1669
1670bool ram_write_tracking_compatible(void)
1671{
1672 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1673 int uffd_fd;
1674 RAMBlock *block;
1675 bool ret = false;
1676
1677
1678 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1679 if (uffd_fd < 0) {
1680 return false;
1681 }
1682
1683 RCU_READ_LOCK_GUARD();
1684
1685 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1686 uint64_t uffd_ioctls;
1687
1688
1689 if (block->mr->readonly || block->mr->rom_device) {
1690 continue;
1691 }
1692
1693 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1694 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1695 goto out;
1696 }
1697 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1698 goto out;
1699 }
1700 }
1701 ret = true;
1702
1703out:
1704 uffd_close_fd(uffd_fd);
1705 return ret;
1706}
1707
1708static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1709 ram_addr_t size)
1710{
1711
1712
1713
1714
1715
1716
1717 for (; offset < size; offset += block->page_size) {
1718 char tmp = *((char *)block->host + offset);
1719
1720
1721 asm volatile("" : "+r" (tmp));
1722 }
1723}
1724
1725static inline int populate_read_section(MemoryRegionSection *section,
1726 void *opaque)
1727{
1728 const hwaddr size = int128_get64(section->size);
1729 hwaddr offset = section->offset_within_region;
1730 RAMBlock *block = section->mr->ram_block;
1731
1732 populate_read_range(block, offset, size);
1733 return 0;
1734}
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745static void ram_block_populate_read(RAMBlock *rb)
1746{
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1759 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1760 MemoryRegionSection section = {
1761 .mr = rb->mr,
1762 .offset_within_region = 0,
1763 .size = rb->mr->size,
1764 };
1765
1766 ram_discard_manager_replay_populated(rdm, §ion,
1767 populate_read_section, NULL);
1768 } else {
1769 populate_read_range(rb, 0, rb->used_length);
1770 }
1771}
1772
1773
1774
1775
1776void ram_write_tracking_prepare(void)
1777{
1778 RAMBlock *block;
1779
1780 RCU_READ_LOCK_GUARD();
1781
1782 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1783
1784 if (block->mr->readonly || block->mr->rom_device) {
1785 continue;
1786 }
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796 ram_block_populate_read(block);
1797 }
1798}
1799
1800
1801
1802
1803
1804
1805int ram_write_tracking_start(void)
1806{
1807 int uffd_fd;
1808 RAMState *rs = ram_state;
1809 RAMBlock *block;
1810
1811
1812 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1813 if (uffd_fd < 0) {
1814 return uffd_fd;
1815 }
1816 rs->uffdio_fd = uffd_fd;
1817
1818 RCU_READ_LOCK_GUARD();
1819
1820 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1821
1822 if (block->mr->readonly || block->mr->rom_device) {
1823 continue;
1824 }
1825
1826
1827 if (uffd_register_memory(rs->uffdio_fd, block->host,
1828 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1829 goto fail;
1830 }
1831
1832 if (uffd_change_protection(rs->uffdio_fd, block->host,
1833 block->max_length, true, false)) {
1834 goto fail;
1835 }
1836 block->flags |= RAM_UF_WRITEPROTECT;
1837 memory_region_ref(block->mr);
1838
1839 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1840 block->host, block->max_length);
1841 }
1842
1843 return 0;
1844
1845fail:
1846 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1847
1848 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1849 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1850 continue;
1851 }
1852
1853
1854
1855
1856 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1857 false, false);
1858 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1859
1860 block->flags &= ~RAM_UF_WRITEPROTECT;
1861 memory_region_unref(block->mr);
1862 }
1863
1864 uffd_close_fd(uffd_fd);
1865 rs->uffdio_fd = -1;
1866 return -1;
1867}
1868
1869
1870
1871
1872void ram_write_tracking_stop(void)
1873{
1874 RAMState *rs = ram_state;
1875 RAMBlock *block;
1876
1877 RCU_READ_LOCK_GUARD();
1878
1879 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1880 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1881 continue;
1882 }
1883
1884 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1885 false, false);
1886 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1887
1888 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1889 block->host, block->max_length);
1890
1891
1892 block->flags &= ~RAM_UF_WRITEPROTECT;
1893 memory_region_unref(block->mr);
1894 }
1895
1896
1897 uffd_close_fd(rs->uffdio_fd);
1898 rs->uffdio_fd = -1;
1899}
1900
1901#else
1902
1903
1904static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1905{
1906 (void) rs;
1907 (void) offset;
1908
1909 return NULL;
1910}
1911
1912static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1913 unsigned long start_page)
1914{
1915 (void) rs;
1916 (void) pss;
1917 (void) start_page;
1918
1919 return 0;
1920}
1921
1922bool ram_write_tracking_available(void)
1923{
1924 return false;
1925}
1926
1927bool ram_write_tracking_compatible(void)
1928{
1929 assert(0);
1930 return false;
1931}
1932
1933int ram_write_tracking_start(void)
1934{
1935 assert(0);
1936 return -1;
1937}
1938
1939void ram_write_tracking_stop(void)
1940{
1941 assert(0);
1942}
1943#endif
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1956{
1957 RAMBlock *block;
1958 ram_addr_t offset;
1959
1960 block = unqueue_page(rs, &offset);
1961
1962 if (!block) {
1963
1964
1965
1966
1967 block = poll_fault_page(rs, &offset);
1968 }
1969
1970 if (block) {
1971
1972
1973
1974
1975
1976 pss->block = block;
1977 pss->page = offset >> TARGET_PAGE_BITS;
1978
1979
1980
1981
1982
1983 pss->complete_round = false;
1984 }
1985
1986 return !!block;
1987}
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997static void migration_page_queue_free(RAMState *rs)
1998{
1999 struct RAMSrcPageRequest *mspr, *next_mspr;
2000
2001
2002
2003 RCU_READ_LOCK_GUARD();
2004 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
2005 memory_region_unref(mspr->rb->mr);
2006 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2007 g_free(mspr);
2008 }
2009}
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2024{
2025 RAMBlock *ramblock;
2026 RAMState *rs = ram_state;
2027
2028 ram_counters.postcopy_requests++;
2029 RCU_READ_LOCK_GUARD();
2030
2031 if (!rbname) {
2032
2033 ramblock = rs->last_req_rb;
2034
2035 if (!ramblock) {
2036
2037
2038
2039
2040 error_report("ram_save_queue_pages no previous block");
2041 return -1;
2042 }
2043 } else {
2044 ramblock = qemu_ram_block_by_name(rbname);
2045
2046 if (!ramblock) {
2047
2048 error_report("ram_save_queue_pages no block '%s'", rbname);
2049 return -1;
2050 }
2051 rs->last_req_rb = ramblock;
2052 }
2053 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2054 if (!offset_in_ramblock(ramblock, start + len - 1)) {
2055 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2056 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2057 __func__, start, len, ramblock->used_length);
2058 return -1;
2059 }
2060
2061 struct RAMSrcPageRequest *new_entry =
2062 g_new0(struct RAMSrcPageRequest, 1);
2063 new_entry->rb = ramblock;
2064 new_entry->offset = start;
2065 new_entry->len = len;
2066
2067 memory_region_ref(ramblock->mr);
2068 qemu_mutex_lock(&rs->src_page_req_mutex);
2069 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2070 migration_make_urgent_request();
2071 qemu_mutex_unlock(&rs->src_page_req_mutex);
2072
2073 return 0;
2074}
2075
2076static bool save_page_use_compression(RAMState *rs)
2077{
2078 if (!migrate_use_compression()) {
2079 return false;
2080 }
2081
2082
2083
2084
2085
2086
2087 if (rs->xbzrle_enabled) {
2088 return false;
2089 }
2090
2091 return true;
2092}
2093
2094
2095
2096
2097
2098
2099static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2100{
2101 if (!save_page_use_compression(rs)) {
2102 return false;
2103 }
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115 if (block != rs->last_sent_block) {
2116 flush_compressed_data(rs);
2117 return false;
2118 }
2119
2120 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2121 return true;
2122 }
2123
2124 compression_counters.busy++;
2125 return false;
2126}
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
2137{
2138 RAMBlock *block = pss->block;
2139 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
2140 int res;
2141
2142 if (control_save_page(rs, block, offset, &res)) {
2143 return res;
2144 }
2145
2146 if (save_compress_page(rs, block, offset)) {
2147 return 1;
2148 }
2149
2150 res = save_zero_page(rs, block, offset);
2151 if (res > 0) {
2152
2153
2154
2155 if (!save_page_use_compression(rs)) {
2156 XBZRLE_cache_lock();
2157 xbzrle_cache_zero_page(rs, block->offset + offset);
2158 XBZRLE_cache_unlock();
2159 }
2160 return res;
2161 }
2162
2163
2164
2165
2166
2167
2168
2169 if (!save_page_use_compression(rs) && migrate_use_multifd()
2170 && !migration_in_postcopy()) {
2171 return ram_save_multifd_page(rs, block, offset);
2172 }
2173
2174 return ram_save_page(rs, pss);
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
2194{
2195 int tmppages, pages = 0;
2196 size_t pagesize_bits =
2197 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2198 unsigned long hostpage_boundary =
2199 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
2200 unsigned long start_page = pss->page;
2201 int res;
2202
2203 if (ramblock_is_ignored(pss->block)) {
2204 error_report("block %s should not be migrated !", pss->block->idstr);
2205 return 0;
2206 }
2207
2208 do {
2209
2210 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2211 tmppages = ram_save_target_page(rs, pss);
2212 if (tmppages < 0) {
2213 return tmppages;
2214 }
2215
2216 pages += tmppages;
2217
2218
2219
2220
2221 if (pagesize_bits > 1 && tmppages > 0) {
2222 migration_rate_limit();
2223 }
2224 }
2225 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2226 } while ((pss->page < hostpage_boundary) &&
2227 offset_in_ramblock(pss->block,
2228 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
2229
2230 pss->page = MIN(pss->page, hostpage_boundary);
2231
2232 res = ram_save_release_protection(rs, pss, start_page);
2233 return (res < 0 ? res : pages);
2234}
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249static int ram_find_and_save_block(RAMState *rs)
2250{
2251 PageSearchStatus pss;
2252 int pages = 0;
2253 bool again, found;
2254
2255
2256 if (!ram_bytes_total()) {
2257 return pages;
2258 }
2259
2260 pss.block = rs->last_seen_block;
2261 pss.page = rs->last_page;
2262 pss.complete_round = false;
2263
2264 if (!pss.block) {
2265 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2266 }
2267
2268 do {
2269 again = true;
2270 found = get_queued_page(rs, &pss);
2271
2272 if (!found) {
2273
2274 found = find_dirty_block(rs, &pss, &again);
2275 }
2276
2277 if (found) {
2278 pages = ram_save_host_page(rs, &pss);
2279 }
2280 } while (!pages && again);
2281
2282 rs->last_seen_block = pss.block;
2283 rs->last_page = pss.page;
2284
2285 return pages;
2286}
2287
2288void acct_update_position(QEMUFile *f, size_t size, bool zero)
2289{
2290 uint64_t pages = size / TARGET_PAGE_SIZE;
2291
2292 if (zero) {
2293 ram_counters.duplicate += pages;
2294 } else {
2295 ram_counters.normal += pages;
2296 ram_transferred_add(size);
2297 qemu_update_position(f, size);
2298 }
2299}
2300
2301static uint64_t ram_bytes_total_common(bool count_ignored)
2302{
2303 RAMBlock *block;
2304 uint64_t total = 0;
2305
2306 RCU_READ_LOCK_GUARD();
2307
2308 if (count_ignored) {
2309 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2310 total += block->used_length;
2311 }
2312 } else {
2313 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2314 total += block->used_length;
2315 }
2316 }
2317 return total;
2318}
2319
2320uint64_t ram_bytes_total(void)
2321{
2322 return ram_bytes_total_common(false);
2323}
2324
2325static void xbzrle_load_setup(void)
2326{
2327 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2328}
2329
2330static void xbzrle_load_cleanup(void)
2331{
2332 g_free(XBZRLE.decoded_buf);
2333 XBZRLE.decoded_buf = NULL;
2334}
2335
2336static void ram_state_cleanup(RAMState **rsp)
2337{
2338 if (*rsp) {
2339 migration_page_queue_free(*rsp);
2340 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2341 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2342 g_free(*rsp);
2343 *rsp = NULL;
2344 }
2345}
2346
2347static void xbzrle_cleanup(void)
2348{
2349 XBZRLE_cache_lock();
2350 if (XBZRLE.cache) {
2351 cache_fini(XBZRLE.cache);
2352 g_free(XBZRLE.encoded_buf);
2353 g_free(XBZRLE.current_buf);
2354 g_free(XBZRLE.zero_target_page);
2355 XBZRLE.cache = NULL;
2356 XBZRLE.encoded_buf = NULL;
2357 XBZRLE.current_buf = NULL;
2358 XBZRLE.zero_target_page = NULL;
2359 }
2360 XBZRLE_cache_unlock();
2361}
2362
2363static void ram_save_cleanup(void *opaque)
2364{
2365 RAMState **rsp = opaque;
2366 RAMBlock *block;
2367
2368
2369 if (!migrate_background_snapshot()) {
2370
2371
2372
2373 if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
2374
2375
2376
2377
2378
2379 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
2380 }
2381 }
2382
2383 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2384 g_free(block->clear_bmap);
2385 block->clear_bmap = NULL;
2386 g_free(block->bmap);
2387 block->bmap = NULL;
2388 }
2389
2390 xbzrle_cleanup();
2391 compress_threads_save_cleanup();
2392 ram_state_cleanup(rsp);
2393}
2394
2395static void ram_state_reset(RAMState *rs)
2396{
2397 rs->last_seen_block = NULL;
2398 rs->last_sent_block = NULL;
2399 rs->last_page = 0;
2400 rs->last_version = ram_list.version;
2401 rs->xbzrle_enabled = false;
2402}
2403
2404#define MAX_WAIT 50
2405
2406
2407
2408void ram_postcopy_migrated_memory_release(MigrationState *ms)
2409{
2410 struct RAMBlock *block;
2411
2412 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2413 unsigned long *bitmap = block->bmap;
2414 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2415 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2416
2417 while (run_start < range) {
2418 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2419 ram_discard_range(block->idstr,
2420 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2421 ((ram_addr_t)(run_end - run_start))
2422 << TARGET_PAGE_BITS);
2423 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2424 }
2425 }
2426}
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436static void postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2437{
2438 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2439 unsigned long current;
2440 unsigned long *bitmap = block->bmap;
2441
2442 for (current = 0; current < end; ) {
2443 unsigned long one = find_next_bit(bitmap, end, current);
2444 unsigned long zero, discard_length;
2445
2446 if (one >= end) {
2447 break;
2448 }
2449
2450 zero = find_next_zero_bit(bitmap, end, one + 1);
2451
2452 if (zero >= end) {
2453 discard_length = end - one;
2454 } else {
2455 discard_length = zero - one;
2456 }
2457 postcopy_discard_send_range(ms, one, discard_length);
2458 current = one + discard_length;
2459 }
2460}
2461
2462static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block);
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475static void postcopy_each_ram_send_discard(MigrationState *ms)
2476{
2477 struct RAMBlock *block;
2478
2479 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2480 postcopy_discard_send_init(ms, block->idstr);
2481
2482
2483
2484
2485
2486
2487
2488 postcopy_chunk_hostpages_pass(ms, block);
2489
2490
2491
2492
2493
2494
2495 postcopy_send_discard_bm_ram(ms, block);
2496 postcopy_discard_send_finish(ms);
2497 }
2498}
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2514{
2515 RAMState *rs = ram_state;
2516 unsigned long *bitmap = block->bmap;
2517 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2518 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2519 unsigned long run_start;
2520
2521 if (block->page_size == TARGET_PAGE_SIZE) {
2522
2523 return;
2524 }
2525
2526
2527 run_start = find_next_bit(bitmap, pages, 0);
2528
2529 while (run_start < pages) {
2530
2531
2532
2533
2534
2535 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2536
2537 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2538
2539
2540
2541
2542
2543 }
2544
2545 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2546 unsigned long page;
2547 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2548 host_ratio);
2549 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2550
2551
2552 for (page = fixup_start_addr;
2553 page < fixup_start_addr + host_ratio; page++) {
2554
2555
2556
2557
2558 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2559 }
2560 }
2561
2562
2563 run_start = find_next_bit(bitmap, pages, run_start);
2564 }
2565}
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580void ram_postcopy_send_discard_bitmap(MigrationState *ms)
2581{
2582 RAMState *rs = ram_state;
2583
2584 RCU_READ_LOCK_GUARD();
2585
2586
2587 migration_bitmap_sync(rs);
2588
2589
2590 rs->last_seen_block = NULL;
2591 rs->last_sent_block = NULL;
2592 rs->last_page = 0;
2593
2594 postcopy_each_ram_send_discard(ms);
2595
2596 trace_ram_postcopy_send_discard_bitmap();
2597}
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2610{
2611 trace_ram_discard_range(rbname, start, length);
2612
2613 RCU_READ_LOCK_GUARD();
2614 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2615
2616 if (!rb) {
2617 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2618 return -1;
2619 }
2620
2621
2622
2623
2624
2625 if (rb->receivedmap) {
2626 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2627 length >> qemu_target_page_bits());
2628 }
2629
2630 return ram_block_discard_range(rb, start, length);
2631}
2632
2633
2634
2635
2636
2637static int xbzrle_init(void)
2638{
2639 Error *local_err = NULL;
2640
2641 if (!migrate_use_xbzrle()) {
2642 return 0;
2643 }
2644
2645 XBZRLE_cache_lock();
2646
2647 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2648 if (!XBZRLE.zero_target_page) {
2649 error_report("%s: Error allocating zero page", __func__);
2650 goto err_out;
2651 }
2652
2653 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2654 TARGET_PAGE_SIZE, &local_err);
2655 if (!XBZRLE.cache) {
2656 error_report_err(local_err);
2657 goto free_zero_page;
2658 }
2659
2660 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2661 if (!XBZRLE.encoded_buf) {
2662 error_report("%s: Error allocating encoded_buf", __func__);
2663 goto free_cache;
2664 }
2665
2666 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2667 if (!XBZRLE.current_buf) {
2668 error_report("%s: Error allocating current_buf", __func__);
2669 goto free_encoded_buf;
2670 }
2671
2672
2673 XBZRLE_cache_unlock();
2674 return 0;
2675
2676free_encoded_buf:
2677 g_free(XBZRLE.encoded_buf);
2678 XBZRLE.encoded_buf = NULL;
2679free_cache:
2680 cache_fini(XBZRLE.cache);
2681 XBZRLE.cache = NULL;
2682free_zero_page:
2683 g_free(XBZRLE.zero_target_page);
2684 XBZRLE.zero_target_page = NULL;
2685err_out:
2686 XBZRLE_cache_unlock();
2687 return -ENOMEM;
2688}
2689
2690static int ram_state_init(RAMState **rsp)
2691{
2692 *rsp = g_try_new0(RAMState, 1);
2693
2694 if (!*rsp) {
2695 error_report("%s: Init ramstate fail", __func__);
2696 return -1;
2697 }
2698
2699 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2700 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2701 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2702
2703
2704
2705
2706
2707
2708 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2709 ram_state_reset(*rsp);
2710
2711 return 0;
2712}
2713
2714static void ram_list_init_bitmaps(void)
2715{
2716 MigrationState *ms = migrate_get_current();
2717 RAMBlock *block;
2718 unsigned long pages;
2719 uint8_t shift;
2720
2721
2722 if (ram_bytes_total()) {
2723 shift = ms->clear_bitmap_shift;
2724 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2725 error_report("clear_bitmap_shift (%u) too big, using "
2726 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2727 shift = CLEAR_BITMAP_SHIFT_MAX;
2728 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2729 error_report("clear_bitmap_shift (%u) too small, using "
2730 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2731 shift = CLEAR_BITMAP_SHIFT_MIN;
2732 }
2733
2734 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2735 pages = block->max_length >> TARGET_PAGE_BITS;
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745 block->bmap = bitmap_new(pages);
2746 bitmap_set(block->bmap, 0, pages);
2747 block->clear_bmap_shift = shift;
2748 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2749 }
2750 }
2751}
2752
2753static void migration_bitmap_clear_discarded_pages(RAMState *rs)
2754{
2755 unsigned long pages;
2756 RAMBlock *rb;
2757
2758 RCU_READ_LOCK_GUARD();
2759
2760 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
2761 pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
2762 rs->migration_dirty_pages -= pages;
2763 }
2764}
2765
2766static void ram_init_bitmaps(RAMState *rs)
2767{
2768
2769 qemu_mutex_lock_iothread();
2770 qemu_mutex_lock_ramlist();
2771
2772 WITH_RCU_READ_LOCK_GUARD() {
2773 ram_list_init_bitmaps();
2774
2775 if (!migrate_background_snapshot()) {
2776 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
2777 migration_bitmap_sync_precopy(rs);
2778 }
2779 }
2780 qemu_mutex_unlock_ramlist();
2781 qemu_mutex_unlock_iothread();
2782
2783
2784
2785
2786
2787 migration_bitmap_clear_discarded_pages(rs);
2788}
2789
2790static int ram_init_all(RAMState **rsp)
2791{
2792 if (ram_state_init(rsp)) {
2793 return -1;
2794 }
2795
2796 if (xbzrle_init()) {
2797 ram_state_cleanup(rsp);
2798 return -1;
2799 }
2800
2801 ram_init_bitmaps(*rsp);
2802
2803 return 0;
2804}
2805
2806static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2807{
2808 RAMBlock *block;
2809 uint64_t pages = 0;
2810
2811
2812
2813
2814
2815
2816
2817 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2818 pages += bitmap_count_one(block->bmap,
2819 block->used_length >> TARGET_PAGE_BITS);
2820 }
2821
2822
2823 rs->migration_dirty_pages = pages;
2824
2825 ram_state_reset(rs);
2826
2827
2828 rs->f = out;
2829
2830 trace_ram_state_resume_prepare(pages);
2831}
2832
2833
2834
2835
2836
2837
2838
2839void qemu_guest_free_page_hint(void *addr, size_t len)
2840{
2841 RAMBlock *block;
2842 ram_addr_t offset;
2843 size_t used_len, start, npages;
2844 MigrationState *s = migrate_get_current();
2845
2846
2847 if (!migration_is_setup_or_active(s->state)) {
2848 return;
2849 }
2850
2851 for (; len > 0; len -= used_len, addr += used_len) {
2852 block = qemu_ram_block_from_host(addr, false, &offset);
2853 if (unlikely(!block || offset >= block->used_length)) {
2854
2855
2856
2857
2858
2859 error_report_once("%s unexpected error", __func__);
2860 return;
2861 }
2862
2863 if (len <= block->used_length - offset) {
2864 used_len = len;
2865 } else {
2866 used_len = block->used_length - offset;
2867 }
2868
2869 start = offset >> TARGET_PAGE_BITS;
2870 npages = used_len >> TARGET_PAGE_BITS;
2871
2872 qemu_mutex_lock(&ram_state->bitmap_mutex);
2873
2874
2875
2876
2877
2878
2879 migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
2880 ram_state->migration_dirty_pages -=
2881 bitmap_count_one_with_offset(block->bmap, start, npages);
2882 bitmap_clear(block->bmap, start, npages);
2883 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2884 }
2885}
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902static int ram_save_setup(QEMUFile *f, void *opaque)
2903{
2904 RAMState **rsp = opaque;
2905 RAMBlock *block;
2906
2907 if (compress_threads_save_setup()) {
2908 return -1;
2909 }
2910
2911
2912 if (!migration_in_colo_state()) {
2913 if (ram_init_all(rsp) != 0) {
2914 compress_threads_save_cleanup();
2915 return -1;
2916 }
2917 }
2918 (*rsp)->f = f;
2919
2920 WITH_RCU_READ_LOCK_GUARD() {
2921 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
2922
2923 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2924 qemu_put_byte(f, strlen(block->idstr));
2925 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2926 qemu_put_be64(f, block->used_length);
2927 if (migrate_postcopy_ram() && block->page_size !=
2928 qemu_host_page_size) {
2929 qemu_put_be64(f, block->page_size);
2930 }
2931 if (migrate_ignore_shared()) {
2932 qemu_put_be64(f, block->mr->addr);
2933 }
2934 }
2935 }
2936
2937 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2938 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2939
2940 multifd_send_sync_main(f);
2941 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2942 qemu_fflush(f);
2943
2944 return 0;
2945}
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955static int ram_save_iterate(QEMUFile *f, void *opaque)
2956{
2957 RAMState **temp = opaque;
2958 RAMState *rs = *temp;
2959 int ret = 0;
2960 int i;
2961 int64_t t0;
2962 int done = 0;
2963
2964 if (blk_mig_bulk_active()) {
2965
2966
2967
2968 goto out;
2969 }
2970
2971
2972
2973
2974
2975
2976
2977
2978 qemu_mutex_lock(&rs->bitmap_mutex);
2979 WITH_RCU_READ_LOCK_GUARD() {
2980 if (ram_list.version != rs->last_version) {
2981 ram_state_reset(rs);
2982 }
2983
2984
2985 smp_rmb();
2986
2987 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2988
2989 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2990 i = 0;
2991 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2992 postcopy_has_request(rs)) {
2993 int pages;
2994
2995 if (qemu_file_get_error(f)) {
2996 break;
2997 }
2998
2999 pages = ram_find_and_save_block(rs);
3000
3001 if (pages == 0) {
3002 done = 1;
3003 break;
3004 }
3005
3006 if (pages < 0) {
3007 qemu_file_set_error(f, pages);
3008 break;
3009 }
3010
3011 rs->target_page_count += pages;
3012
3013
3014
3015
3016
3017 if (migrate_postcopy_ram()) {
3018 flush_compressed_data(rs);
3019 }
3020
3021
3022
3023
3024
3025
3026
3027 if ((i & 63) == 0) {
3028 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3029 1000000;
3030 if (t1 > MAX_WAIT) {
3031 trace_ram_save_iterate_big_wait(t1, i);
3032 break;
3033 }
3034 }
3035 i++;
3036 }
3037 }
3038 qemu_mutex_unlock(&rs->bitmap_mutex);
3039
3040
3041
3042
3043
3044 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3045
3046out:
3047 if (ret >= 0
3048 && migration_is_setup_or_active(migrate_get_current()->state)) {
3049 multifd_send_sync_main(rs->f);
3050 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3051 qemu_fflush(f);
3052 ram_transferred_add(8);
3053
3054 ret = qemu_file_get_error(f);
3055 }
3056 if (ret < 0) {
3057 return ret;
3058 }
3059
3060 return done;
3061}
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073static int ram_save_complete(QEMUFile *f, void *opaque)
3074{
3075 RAMState **temp = opaque;
3076 RAMState *rs = *temp;
3077 int ret = 0;
3078
3079 rs->last_stage = !migration_in_colo_state();
3080
3081 WITH_RCU_READ_LOCK_GUARD() {
3082 if (!migration_in_postcopy()) {
3083 migration_bitmap_sync_precopy(rs);
3084 }
3085
3086 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3087
3088
3089
3090
3091 while (true) {
3092 int pages;
3093
3094 pages = ram_find_and_save_block(rs);
3095
3096 if (pages == 0) {
3097 break;
3098 }
3099 if (pages < 0) {
3100 ret = pages;
3101 break;
3102 }
3103 }
3104
3105 flush_compressed_data(rs);
3106 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3107 }
3108
3109 if (ret >= 0) {
3110 multifd_send_sync_main(rs->f);
3111 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3112 qemu_fflush(f);
3113 }
3114
3115 return ret;
3116}
3117
3118static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3119 uint64_t *res_precopy_only,
3120 uint64_t *res_compatible,
3121 uint64_t *res_postcopy_only)
3122{
3123 RAMState **temp = opaque;
3124 RAMState *rs = *temp;
3125 uint64_t remaining_size;
3126
3127 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3128
3129 if (!migration_in_postcopy() &&
3130 remaining_size < max_size) {
3131 qemu_mutex_lock_iothread();
3132 WITH_RCU_READ_LOCK_GUARD() {
3133 migration_bitmap_sync_precopy(rs);
3134 }
3135 qemu_mutex_unlock_iothread();
3136 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3137 }
3138
3139 if (migrate_postcopy_ram()) {
3140
3141 *res_compatible += remaining_size;
3142 } else {
3143 *res_precopy_only += remaining_size;
3144 }
3145}
3146
3147static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3148{
3149 unsigned int xh_len;
3150 int xh_flags;
3151 uint8_t *loaded_data;
3152
3153
3154 xh_flags = qemu_get_byte(f);
3155 xh_len = qemu_get_be16(f);
3156
3157 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3158 error_report("Failed to load XBZRLE page - wrong compression!");
3159 return -1;
3160 }
3161
3162 if (xh_len > TARGET_PAGE_SIZE) {
3163 error_report("Failed to load XBZRLE page - len overflow!");
3164 return -1;
3165 }
3166 loaded_data = XBZRLE.decoded_buf;
3167
3168
3169 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3170
3171
3172 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3173 TARGET_PAGE_SIZE) == -1) {
3174 error_report("Failed to load XBZRLE page - decode error!");
3175 return -1;
3176 }
3177
3178 return 0;
3179}
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
3193 QEMUFile *f, int flags)
3194{
3195 RAMBlock *block = mis->last_recv_block;
3196 char id[256];
3197 uint8_t len;
3198
3199 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3200 if (!block) {
3201 error_report("Ack, bad migration stream!");
3202 return NULL;
3203 }
3204 return block;
3205 }
3206
3207 len = qemu_get_byte(f);
3208 qemu_get_buffer(f, (uint8_t *)id, len);
3209 id[len] = 0;
3210
3211 block = qemu_ram_block_by_name(id);
3212 if (!block) {
3213 error_report("Can't find block %s", id);
3214 return NULL;
3215 }
3216
3217 if (ramblock_is_ignored(block)) {
3218 error_report("block %s should not be migrated !", id);
3219 return NULL;
3220 }
3221
3222 mis->last_recv_block = block;
3223
3224 return block;
3225}
3226
3227static inline void *host_from_ram_block_offset(RAMBlock *block,
3228 ram_addr_t offset)
3229{
3230 if (!offset_in_ramblock(block, offset)) {
3231 return NULL;
3232 }
3233
3234 return block->host + offset;
3235}
3236
3237static void *host_page_from_ram_block_offset(RAMBlock *block,
3238 ram_addr_t offset)
3239{
3240
3241 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3242 block->page_size);
3243}
3244
3245static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3246 ram_addr_t offset)
3247{
3248 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3249}
3250
3251static inline void *colo_cache_from_block_offset(RAMBlock *block,
3252 ram_addr_t offset, bool record_bitmap)
3253{
3254 if (!offset_in_ramblock(block, offset)) {
3255 return NULL;
3256 }
3257 if (!block->colo_cache) {
3258 error_report("%s: colo_cache is NULL in block :%s",
3259 __func__, block->idstr);
3260 return NULL;
3261 }
3262
3263
3264
3265
3266
3267
3268 if (record_bitmap &&
3269 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3270 ram_state->migration_dirty_pages++;
3271 }
3272 return block->colo_cache + offset;
3273}
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3286{
3287 if (ch != 0 || !buffer_is_zero(host, size)) {
3288 memset(host, ch, size);
3289 }
3290}
3291
3292
3293static int
3294qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3295 const uint8_t *source, size_t source_len)
3296{
3297 int err;
3298
3299 err = inflateReset(stream);
3300 if (err != Z_OK) {
3301 return -1;
3302 }
3303
3304 stream->avail_in = source_len;
3305 stream->next_in = (uint8_t *)source;
3306 stream->avail_out = dest_len;
3307 stream->next_out = dest;
3308
3309 err = inflate(stream, Z_NO_FLUSH);
3310 if (err != Z_STREAM_END) {
3311 return -1;
3312 }
3313
3314 return stream->total_out;
3315}
3316
3317static void *do_data_decompress(void *opaque)
3318{
3319 DecompressParam *param = opaque;
3320 unsigned long pagesize;
3321 uint8_t *des;
3322 int len, ret;
3323
3324 qemu_mutex_lock(¶m->mutex);
3325 while (!param->quit) {
3326 if (param->des) {
3327 des = param->des;
3328 len = param->len;
3329 param->des = 0;
3330 qemu_mutex_unlock(¶m->mutex);
3331
3332 pagesize = TARGET_PAGE_SIZE;
3333
3334 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3335 param->compbuf, len);
3336 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3337 error_report("decompress data failed");
3338 qemu_file_set_error(decomp_file, ret);
3339 }
3340
3341 qemu_mutex_lock(&decomp_done_lock);
3342 param->done = true;
3343 qemu_cond_signal(&decomp_done_cond);
3344 qemu_mutex_unlock(&decomp_done_lock);
3345
3346 qemu_mutex_lock(¶m->mutex);
3347 } else {
3348 qemu_cond_wait(¶m->cond, ¶m->mutex);
3349 }
3350 }
3351 qemu_mutex_unlock(¶m->mutex);
3352
3353 return NULL;
3354}
3355
3356static int wait_for_decompress_done(void)
3357{
3358 int idx, thread_count;
3359
3360 if (!migrate_use_compression()) {
3361 return 0;
3362 }
3363
3364 thread_count = migrate_decompress_threads();
3365 qemu_mutex_lock(&decomp_done_lock);
3366 for (idx = 0; idx < thread_count; idx++) {
3367 while (!decomp_param[idx].done) {
3368 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3369 }
3370 }
3371 qemu_mutex_unlock(&decomp_done_lock);
3372 return qemu_file_get_error(decomp_file);
3373}
3374
3375static void compress_threads_load_cleanup(void)
3376{
3377 int i, thread_count;
3378
3379 if (!migrate_use_compression()) {
3380 return;
3381 }
3382 thread_count = migrate_decompress_threads();
3383 for (i = 0; i < thread_count; i++) {
3384
3385
3386
3387
3388 if (!decomp_param[i].compbuf) {
3389 break;
3390 }
3391
3392 qemu_mutex_lock(&decomp_param[i].mutex);
3393 decomp_param[i].quit = true;
3394 qemu_cond_signal(&decomp_param[i].cond);
3395 qemu_mutex_unlock(&decomp_param[i].mutex);
3396 }
3397 for (i = 0; i < thread_count; i++) {
3398 if (!decomp_param[i].compbuf) {
3399 break;
3400 }
3401
3402 qemu_thread_join(decompress_threads + i);
3403 qemu_mutex_destroy(&decomp_param[i].mutex);
3404 qemu_cond_destroy(&decomp_param[i].cond);
3405 inflateEnd(&decomp_param[i].stream);
3406 g_free(decomp_param[i].compbuf);
3407 decomp_param[i].compbuf = NULL;
3408 }
3409 g_free(decompress_threads);
3410 g_free(decomp_param);
3411 decompress_threads = NULL;
3412 decomp_param = NULL;
3413 decomp_file = NULL;
3414}
3415
3416static int compress_threads_load_setup(QEMUFile *f)
3417{
3418 int i, thread_count;
3419
3420 if (!migrate_use_compression()) {
3421 return 0;
3422 }
3423
3424 thread_count = migrate_decompress_threads();
3425 decompress_threads = g_new0(QemuThread, thread_count);
3426 decomp_param = g_new0(DecompressParam, thread_count);
3427 qemu_mutex_init(&decomp_done_lock);
3428 qemu_cond_init(&decomp_done_cond);
3429 decomp_file = f;
3430 for (i = 0; i < thread_count; i++) {
3431 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3432 goto exit;
3433 }
3434
3435 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3436 qemu_mutex_init(&decomp_param[i].mutex);
3437 qemu_cond_init(&decomp_param[i].cond);
3438 decomp_param[i].done = true;
3439 decomp_param[i].quit = false;
3440 qemu_thread_create(decompress_threads + i, "decompress",
3441 do_data_decompress, decomp_param + i,
3442 QEMU_THREAD_JOINABLE);
3443 }
3444 return 0;
3445exit:
3446 compress_threads_load_cleanup();
3447 return -1;
3448}
3449
3450static void decompress_data_with_multi_threads(QEMUFile *f,
3451 void *host, int len)
3452{
3453 int idx, thread_count;
3454
3455 thread_count = migrate_decompress_threads();
3456 QEMU_LOCK_GUARD(&decomp_done_lock);
3457 while (true) {
3458 for (idx = 0; idx < thread_count; idx++) {
3459 if (decomp_param[idx].done) {
3460 decomp_param[idx].done = false;
3461 qemu_mutex_lock(&decomp_param[idx].mutex);
3462 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3463 decomp_param[idx].des = host;
3464 decomp_param[idx].len = len;
3465 qemu_cond_signal(&decomp_param[idx].cond);
3466 qemu_mutex_unlock(&decomp_param[idx].mutex);
3467 break;
3468 }
3469 }
3470 if (idx < thread_count) {
3471 break;
3472 } else {
3473 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3474 }
3475 }
3476}
3477
3478static void colo_init_ram_state(void)
3479{
3480 ram_state_init(&ram_state);
3481}
3482
3483
3484
3485
3486
3487
3488int colo_init_ram_cache(void)
3489{
3490 RAMBlock *block;
3491
3492 WITH_RCU_READ_LOCK_GUARD() {
3493 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3494 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3495 NULL, false, false);
3496 if (!block->colo_cache) {
3497 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3498 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3499 block->used_length);
3500 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3501 if (block->colo_cache) {
3502 qemu_anon_ram_free(block->colo_cache, block->used_length);
3503 block->colo_cache = NULL;
3504 }
3505 }
3506 return -errno;
3507 }
3508 if (!machine_dump_guest_core(current_machine)) {
3509 qemu_madvise(block->colo_cache, block->used_length,
3510 QEMU_MADV_DONTDUMP);
3511 }
3512 }
3513 }
3514
3515
3516
3517
3518
3519
3520 if (ram_bytes_total()) {
3521 RAMBlock *block;
3522
3523 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3524 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3525 block->bmap = bitmap_new(pages);
3526 }
3527 }
3528
3529 colo_init_ram_state();
3530 return 0;
3531}
3532
3533
3534void colo_incoming_start_dirty_log(void)
3535{
3536 RAMBlock *block = NULL;
3537
3538 qemu_mutex_lock_iothread();
3539 qemu_mutex_lock_ramlist();
3540
3541 memory_global_dirty_log_sync();
3542 WITH_RCU_READ_LOCK_GUARD() {
3543 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3544 ramblock_sync_dirty_bitmap(ram_state, block);
3545
3546 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3547 }
3548 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
3549 }
3550 ram_state->migration_dirty_pages = 0;
3551 qemu_mutex_unlock_ramlist();
3552 qemu_mutex_unlock_iothread();
3553}
3554
3555
3556void colo_release_ram_cache(void)
3557{
3558 RAMBlock *block;
3559
3560 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
3561 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3562 g_free(block->bmap);
3563 block->bmap = NULL;
3564 }
3565
3566 WITH_RCU_READ_LOCK_GUARD() {
3567 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3568 if (block->colo_cache) {
3569 qemu_anon_ram_free(block->colo_cache, block->used_length);
3570 block->colo_cache = NULL;
3571 }
3572 }
3573 }
3574 ram_state_cleanup(&ram_state);
3575}
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585static int ram_load_setup(QEMUFile *f, void *opaque)
3586{
3587 if (compress_threads_load_setup(f)) {
3588 return -1;
3589 }
3590
3591 xbzrle_load_setup();
3592 ramblock_recv_map_init();
3593
3594 return 0;
3595}
3596
3597static int ram_load_cleanup(void *opaque)
3598{
3599 RAMBlock *rb;
3600
3601 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3602 qemu_ram_block_writeback(rb);
3603 }
3604
3605 xbzrle_load_cleanup();
3606 compress_threads_load_cleanup();
3607
3608 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3609 g_free(rb->receivedmap);
3610 rb->receivedmap = NULL;
3611 }
3612
3613 return 0;
3614}
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3628{
3629 return postcopy_ram_incoming_init(mis);
3630}
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642static int ram_load_postcopy(QEMUFile *f)
3643{
3644 int flags = 0, ret = 0;
3645 bool place_needed = false;
3646 bool matches_target_page_size = false;
3647 MigrationIncomingState *mis = migration_incoming_get_current();
3648
3649 PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[0];
3650
3651 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3652 ram_addr_t addr;
3653 void *page_buffer = NULL;
3654 void *place_source = NULL;
3655 RAMBlock *block = NULL;
3656 uint8_t ch;
3657 int len;
3658
3659 addr = qemu_get_be64(f);
3660
3661
3662
3663
3664
3665 ret = qemu_file_get_error(f);
3666 if (ret) {
3667 break;
3668 }
3669
3670 flags = addr & ~TARGET_PAGE_MASK;
3671 addr &= TARGET_PAGE_MASK;
3672
3673 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3674 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3675 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
3676 block = ram_block_from_stream(mis, f, flags);
3677 if (!block) {
3678 ret = -EINVAL;
3679 break;
3680 }
3681
3682
3683
3684
3685
3686
3687
3688 if (!block->host || addr >= block->postcopy_length) {
3689 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3690 ret = -EINVAL;
3691 break;
3692 }
3693 tmp_page->target_pages++;
3694 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705 page_buffer = tmp_page->tmp_huge_page +
3706 host_page_offset_from_ram_block_offset(block, addr);
3707
3708 if (tmp_page->target_pages == 1) {
3709 tmp_page->host_addr =
3710 host_page_from_ram_block_offset(block, addr);
3711 } else if (tmp_page->host_addr !=
3712 host_page_from_ram_block_offset(block, addr)) {
3713
3714 error_report("Non-same host page detected. "
3715 "Target host page %p, received host page %p "
3716 "(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",
3717 tmp_page->host_addr,
3718 host_page_from_ram_block_offset(block, addr),
3719 block->idstr, addr, tmp_page->target_pages);
3720 ret = -EINVAL;
3721 break;
3722 }
3723
3724
3725
3726
3727
3728 if (tmp_page->target_pages ==
3729 (block->page_size / TARGET_PAGE_SIZE)) {
3730 place_needed = true;
3731 }
3732 place_source = tmp_page->tmp_huge_page;
3733 }
3734
3735 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3736 case RAM_SAVE_FLAG_ZERO:
3737 ch = qemu_get_byte(f);
3738
3739
3740
3741
3742 if (ch || !matches_target_page_size) {
3743 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3744 }
3745 if (ch) {
3746 tmp_page->all_zero = false;
3747 }
3748 break;
3749
3750 case RAM_SAVE_FLAG_PAGE:
3751 tmp_page->all_zero = false;
3752 if (!matches_target_page_size) {
3753
3754 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3755 } else {
3756
3757
3758
3759
3760
3761
3762
3763
3764 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3765 TARGET_PAGE_SIZE);
3766 }
3767 break;
3768 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3769 tmp_page->all_zero = false;
3770 len = qemu_get_be32(f);
3771 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3772 error_report("Invalid compressed data length: %d", len);
3773 ret = -EINVAL;
3774 break;
3775 }
3776 decompress_data_with_multi_threads(f, page_buffer, len);
3777 break;
3778
3779 case RAM_SAVE_FLAG_EOS:
3780
3781 multifd_recv_sync_main();
3782 break;
3783 default:
3784 error_report("Unknown combination of migration flags: 0x%x"
3785 " (postcopy mode)", flags);
3786 ret = -EINVAL;
3787 break;
3788 }
3789
3790
3791 if (place_needed) {
3792 ret |= wait_for_decompress_done();
3793 }
3794
3795
3796 if (!ret && qemu_file_get_error(f)) {
3797 ret = qemu_file_get_error(f);
3798 }
3799
3800 if (!ret && place_needed) {
3801 if (tmp_page->all_zero) {
3802 ret = postcopy_place_page_zero(mis, tmp_page->host_addr, block);
3803 } else {
3804 ret = postcopy_place_page(mis, tmp_page->host_addr,
3805 place_source, block);
3806 }
3807 place_needed = false;
3808 postcopy_temp_page_reset(tmp_page);
3809 }
3810 }
3811
3812 return ret;
3813}
3814
3815static bool postcopy_is_advised(void)
3816{
3817 PostcopyState ps = postcopy_state_get();
3818 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3819}
3820
3821static bool postcopy_is_running(void)
3822{
3823 PostcopyState ps = postcopy_state_get();
3824 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3825}
3826
3827
3828
3829
3830
3831void colo_flush_ram_cache(void)
3832{
3833 RAMBlock *block = NULL;
3834 void *dst_host;
3835 void *src_host;
3836 unsigned long offset = 0;
3837
3838 memory_global_dirty_log_sync();
3839 WITH_RCU_READ_LOCK_GUARD() {
3840 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3841 ramblock_sync_dirty_bitmap(ram_state, block);
3842 }
3843 }
3844
3845 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3846 WITH_RCU_READ_LOCK_GUARD() {
3847 block = QLIST_FIRST_RCU(&ram_list.blocks);
3848
3849 while (block) {
3850 unsigned long num = 0;
3851
3852 offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
3853 if (!offset_in_ramblock(block,
3854 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
3855 offset = 0;
3856 num = 0;
3857 block = QLIST_NEXT_RCU(block, next);
3858 } else {
3859 unsigned long i = 0;
3860
3861 for (i = 0; i < num; i++) {
3862 migration_bitmap_clear_dirty(ram_state, block, offset + i);
3863 }
3864 dst_host = block->host
3865 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3866 src_host = block->colo_cache
3867 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3868 memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
3869 offset += num;
3870 }
3871 }
3872 }
3873 trace_colo_flush_ram_cache_end();
3874}
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886static int ram_load_precopy(QEMUFile *f)
3887{
3888 MigrationIncomingState *mis = migration_incoming_get_current();
3889 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
3890
3891 bool postcopy_advised = postcopy_is_advised();
3892 if (!migrate_use_compression()) {
3893 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3894 }
3895
3896 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3897 ram_addr_t addr, total_ram_bytes;
3898 void *host = NULL, *host_bak = NULL;
3899 uint8_t ch;
3900
3901
3902
3903
3904
3905 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3906 aio_co_schedule(qemu_get_current_aio_context(),
3907 qemu_coroutine_self());
3908 qemu_coroutine_yield();
3909 }
3910 i++;
3911
3912 addr = qemu_get_be64(f);
3913 flags = addr & ~TARGET_PAGE_MASK;
3914 addr &= TARGET_PAGE_MASK;
3915
3916 if (flags & invalid_flags) {
3917 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3918 error_report("Received an unexpected compressed page");
3919 }
3920
3921 ret = -EINVAL;
3922 break;
3923 }
3924
3925 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3926 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3927 RAMBlock *block = ram_block_from_stream(mis, f, flags);
3928
3929 host = host_from_ram_block_offset(block, addr);
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941 if (migration_incoming_colo_enabled()) {
3942 if (migration_incoming_in_colo_state()) {
3943
3944 host = colo_cache_from_block_offset(block, addr, true);
3945 } else {
3946
3947
3948
3949
3950 host_bak = colo_cache_from_block_offset(block, addr, false);
3951 }
3952 }
3953 if (!host) {
3954 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3955 ret = -EINVAL;
3956 break;
3957 }
3958 if (!migration_incoming_in_colo_state()) {
3959 ramblock_recv_bitmap_set(block, host);
3960 }
3961
3962 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3963 }
3964
3965 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3966 case RAM_SAVE_FLAG_MEM_SIZE:
3967
3968 total_ram_bytes = addr;
3969 while (!ret && total_ram_bytes) {
3970 RAMBlock *block;
3971 char id[256];
3972 ram_addr_t length;
3973
3974 len = qemu_get_byte(f);
3975 qemu_get_buffer(f, (uint8_t *)id, len);
3976 id[len] = 0;
3977 length = qemu_get_be64(f);
3978
3979 block = qemu_ram_block_by_name(id);
3980 if (block && !qemu_ram_is_migratable(block)) {
3981 error_report("block %s should not be migrated !", id);
3982 ret = -EINVAL;
3983 } else if (block) {
3984 if (length != block->used_length) {
3985 Error *local_err = NULL;
3986
3987 ret = qemu_ram_resize(block, length,
3988 &local_err);
3989 if (local_err) {
3990 error_report_err(local_err);
3991 }
3992 }
3993
3994 if (postcopy_advised && migrate_postcopy_ram() &&
3995 block->page_size != qemu_host_page_size) {
3996 uint64_t remote_page_size = qemu_get_be64(f);
3997 if (remote_page_size != block->page_size) {
3998 error_report("Mismatched RAM page size %s "
3999 "(local) %zd != %" PRId64,
4000 id, block->page_size,
4001 remote_page_size);
4002 ret = -EINVAL;
4003 }
4004 }
4005 if (migrate_ignore_shared()) {
4006 hwaddr addr = qemu_get_be64(f);
4007 if (ramblock_is_ignored(block) &&
4008 block->mr->addr != addr) {
4009 error_report("Mismatched GPAs for block %s "
4010 "%" PRId64 "!= %" PRId64,
4011 id, (uint64_t)addr,
4012 (uint64_t)block->mr->addr);
4013 ret = -EINVAL;
4014 }
4015 }
4016 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4017 block->idstr);
4018 } else {
4019 error_report("Unknown ramblock \"%s\", cannot "
4020 "accept migration", id);
4021 ret = -EINVAL;
4022 }
4023
4024 total_ram_bytes -= length;
4025 }
4026 break;
4027
4028 case RAM_SAVE_FLAG_ZERO:
4029 ch = qemu_get_byte(f);
4030 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4031 break;
4032
4033 case RAM_SAVE_FLAG_PAGE:
4034 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4035 break;
4036
4037 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4038 len = qemu_get_be32(f);
4039 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4040 error_report("Invalid compressed data length: %d", len);
4041 ret = -EINVAL;
4042 break;
4043 }
4044 decompress_data_with_multi_threads(f, host, len);
4045 break;
4046
4047 case RAM_SAVE_FLAG_XBZRLE:
4048 if (load_xbzrle(f, addr, host) < 0) {
4049 error_report("Failed to decompress XBZRLE page at "
4050 RAM_ADDR_FMT, addr);
4051 ret = -EINVAL;
4052 break;
4053 }
4054 break;
4055 case RAM_SAVE_FLAG_EOS:
4056
4057 multifd_recv_sync_main();
4058 break;
4059 default:
4060 if (flags & RAM_SAVE_FLAG_HOOK) {
4061 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
4062 } else {
4063 error_report("Unknown combination of migration flags: 0x%x",
4064 flags);
4065 ret = -EINVAL;
4066 }
4067 }
4068 if (!ret) {
4069 ret = qemu_file_get_error(f);
4070 }
4071 if (!ret && host_bak) {
4072 memcpy(host_bak, host, TARGET_PAGE_SIZE);
4073 }
4074 }
4075
4076 ret |= wait_for_decompress_done();
4077 return ret;
4078}
4079
4080static int ram_load(QEMUFile *f, void *opaque, int version_id)
4081{
4082 int ret = 0;
4083 static uint64_t seq_iter;
4084
4085
4086
4087
4088 bool postcopy_running = postcopy_is_running();
4089
4090 seq_iter++;
4091
4092 if (version_id != 4) {
4093 return -EINVAL;
4094 }
4095
4096
4097
4098
4099
4100
4101
4102 WITH_RCU_READ_LOCK_GUARD() {
4103 if (postcopy_running) {
4104 ret = ram_load_postcopy(f);
4105 } else {
4106 ret = ram_load_precopy(f);
4107 }
4108 }
4109 trace_ram_load_complete(ret, seq_iter);
4110
4111 return ret;
4112}
4113
4114static bool ram_has_postcopy(void *opaque)
4115{
4116 RAMBlock *rb;
4117 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4118 if (ramblock_is_pmem(rb)) {
4119 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4120 "is not supported now!", rb->idstr, rb->host);
4121 return false;
4122 }
4123 }
4124
4125 return migrate_postcopy_ram();
4126}
4127
4128
4129static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4130{
4131 RAMBlock *block;
4132 QEMUFile *file = s->to_dst_file;
4133 int ramblock_count = 0;
4134
4135 trace_ram_dirty_bitmap_sync_start();
4136
4137 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4138 qemu_savevm_send_recv_bitmap(file, block->idstr);
4139 trace_ram_dirty_bitmap_request(block->idstr);
4140 ramblock_count++;
4141 }
4142
4143 trace_ram_dirty_bitmap_sync_wait();
4144
4145
4146 while (ramblock_count--) {
4147 qemu_sem_wait(&s->rp_state.rp_sem);
4148 }
4149
4150 trace_ram_dirty_bitmap_sync_complete();
4151
4152 return 0;
4153}
4154
4155static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4156{
4157 qemu_sem_post(&s->rp_state.rp_sem);
4158}
4159
4160
4161
4162
4163
4164
4165int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4166{
4167 int ret = -EINVAL;
4168
4169 QEMUFile *file = s->rp_state.from_dst_file;
4170 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4171 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4172 uint64_t size, end_mark;
4173
4174 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4175
4176 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4177 error_report("%s: incorrect state %s", __func__,
4178 MigrationStatus_str(s->state));
4179 return -EINVAL;
4180 }
4181
4182
4183
4184
4185
4186 local_size = ROUND_UP(local_size, 8);
4187
4188
4189 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4190
4191 size = qemu_get_be64(file);
4192
4193
4194 if (size != local_size) {
4195 error_report("%s: ramblock '%s' bitmap size mismatch "
4196 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4197 block->idstr, size, local_size);
4198 ret = -EINVAL;
4199 goto out;
4200 }
4201
4202 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4203 end_mark = qemu_get_be64(file);
4204
4205 ret = qemu_file_get_error(file);
4206 if (ret || size != local_size) {
4207 error_report("%s: read bitmap failed for ramblock '%s': %d"
4208 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4209 __func__, block->idstr, ret, local_size, size);
4210 ret = -EIO;
4211 goto out;
4212 }
4213
4214 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4215 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
4216 __func__, block->idstr, end_mark);
4217 ret = -EINVAL;
4218 goto out;
4219 }
4220
4221
4222
4223
4224
4225 bitmap_from_le(block->bmap, le_bitmap, nbits);
4226
4227
4228
4229
4230
4231 bitmap_complement(block->bmap, block->bmap, nbits);
4232
4233
4234 ramblock_dirty_bitmap_clear_discarded_pages(block);
4235
4236
4237 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4238
4239
4240
4241
4242
4243 ram_dirty_bitmap_reload_notify(s);
4244
4245 ret = 0;
4246out:
4247 g_free(le_bitmap);
4248 return ret;
4249}
4250
4251static int ram_resume_prepare(MigrationState *s, void *opaque)
4252{
4253 RAMState *rs = *(RAMState **)opaque;
4254 int ret;
4255
4256 ret = ram_dirty_bitmap_sync_all(s, rs);
4257 if (ret) {
4258 return ret;
4259 }
4260
4261 ram_state_resume_prepare(rs, s->to_dst_file);
4262
4263 return 0;
4264}
4265
4266static SaveVMHandlers savevm_ram_handlers = {
4267 .save_setup = ram_save_setup,
4268 .save_live_iterate = ram_save_iterate,
4269 .save_live_complete_postcopy = ram_save_complete,
4270 .save_live_complete_precopy = ram_save_complete,
4271 .has_postcopy = ram_has_postcopy,
4272 .save_live_pending = ram_save_pending,
4273 .load_state = ram_load,
4274 .save_cleanup = ram_save_cleanup,
4275 .load_setup = ram_load_setup,
4276 .load_cleanup = ram_load_cleanup,
4277 .resume_prepare = ram_resume_prepare,
4278};
4279
4280static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4281 size_t old_size, size_t new_size)
4282{
4283 PostcopyState ps = postcopy_state_get();
4284 ram_addr_t offset;
4285 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4286 Error *err = NULL;
4287
4288 if (ramblock_is_ignored(rb)) {
4289 return;
4290 }
4291
4292 if (!migration_is_idle()) {
4293
4294
4295
4296
4297
4298
4299 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4300 migration_cancel(err);
4301 error_free(err);
4302 }
4303
4304 switch (ps) {
4305 case POSTCOPY_INCOMING_ADVISE:
4306
4307
4308
4309
4310
4311 if (old_size < new_size) {
4312 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4313 error_report("RAM block '%s' discard of resized RAM failed",
4314 rb->idstr);
4315 }
4316 }
4317 rb->postcopy_length = new_size;
4318 break;
4319 case POSTCOPY_INCOMING_NONE:
4320 case POSTCOPY_INCOMING_RUNNING:
4321 case POSTCOPY_INCOMING_END:
4322
4323
4324
4325
4326
4327 break;
4328 default:
4329 error_report("RAM block '%s' resized during postcopy state: %d",
4330 rb->idstr, ps);
4331 exit(-1);
4332 }
4333}
4334
4335static RAMBlockNotifier ram_mig_ram_notifier = {
4336 .ram_block_resized = ram_mig_ram_block_resized,
4337};
4338
4339void ram_mig_init(void)
4340{
4341 qemu_mutex_init(&XBZRLE.lock);
4342 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4343 ram_block_notifier_add(&ram_mig_ram_notifier);
4344}
4345