1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include "qemu/osdep.h"
30#include "qemu/cutils.h"
31#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
33#include "qemu/madvise.h"
34#include "qemu/main-loop.h"
35#include "io/channel-null.h"
36#include "xbzrle.h"
37#include "ram.h"
38#include "migration.h"
39#include "migration/register.h"
40#include "migration/misc.h"
41#include "qemu-file.h"
42#include "postcopy-ram.h"
43#include "page_cache.h"
44#include "qemu/error-report.h"
45#include "qapi/error.h"
46#include "qapi/qapi-types-migration.h"
47#include "qapi/qapi-events-migration.h"
48#include "qapi/qmp/qerror.h"
49#include "trace.h"
50#include "exec/ram_addr.h"
51#include "exec/target_page.h"
52#include "qemu/rcu_queue.h"
53#include "migration/colo.h"
54#include "block.h"
55#include "sysemu/cpu-throttle.h"
56#include "savevm.h"
57#include "qemu/iov.h"
58#include "multifd.h"
59#include "sysemu/runstate.h"
60
61#include "hw/boards.h"
62
63#if defined(__linux__)
64#include "qemu/userfaultfd.h"
65#endif
66
67
68
69
70
71
72
73
74
75
76#define RAM_SAVE_FLAG_FULL 0x01
77#define RAM_SAVE_FLAG_ZERO 0x02
78#define RAM_SAVE_FLAG_MEM_SIZE 0x04
79#define RAM_SAVE_FLAG_PAGE 0x08
80#define RAM_SAVE_FLAG_EOS 0x10
81#define RAM_SAVE_FLAG_CONTINUE 0x20
82#define RAM_SAVE_FLAG_XBZRLE 0x40
83
84#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
85
86XBZRLECacheStats xbzrle_counters;
87
88
89
90static struct {
91
92 uint8_t *encoded_buf;
93
94 uint8_t *current_buf;
95
96 PageCache *cache;
97 QemuMutex lock;
98
99 uint8_t *zero_target_page;
100
101 uint8_t *decoded_buf;
102} XBZRLE;
103
104static void XBZRLE_cache_lock(void)
105{
106 if (migrate_use_xbzrle()) {
107 qemu_mutex_lock(&XBZRLE.lock);
108 }
109}
110
111static void XBZRLE_cache_unlock(void)
112{
113 if (migrate_use_xbzrle()) {
114 qemu_mutex_unlock(&XBZRLE.lock);
115 }
116}
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131int xbzrle_cache_resize(uint64_t new_size, Error **errp)
132{
133 PageCache *new_cache;
134 int64_t ret = 0;
135
136
137 if (new_size != (size_t)new_size) {
138 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
139 "exceeding address space");
140 return -1;
141 }
142
143 if (new_size == migrate_xbzrle_cache_size()) {
144
145 return 0;
146 }
147
148 XBZRLE_cache_lock();
149
150 if (XBZRLE.cache != NULL) {
151 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
152 if (!new_cache) {
153 ret = -1;
154 goto out;
155 }
156
157 cache_fini(XBZRLE.cache);
158 XBZRLE.cache = new_cache;
159 }
160out:
161 XBZRLE_cache_unlock();
162 return ret;
163}
164
165bool ramblock_is_ignored(RAMBlock *block)
166{
167 return !qemu_ram_is_migratable(block) ||
168 (migrate_ignore_shared() && qemu_ram_is_shared(block));
169}
170
171#undef RAMBLOCK_FOREACH
172
173int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
174{
175 RAMBlock *block;
176 int ret = 0;
177
178 RCU_READ_LOCK_GUARD();
179
180 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
181 ret = func(block, opaque);
182 if (ret) {
183 break;
184 }
185 }
186 return ret;
187}
188
189static void ramblock_recv_map_init(void)
190{
191 RAMBlock *rb;
192
193 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
194 assert(!rb->receivedmap);
195 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
196 }
197}
198
199int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
200{
201 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
202 rb->receivedmap);
203}
204
205bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
206{
207 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
208}
209
210void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
211{
212 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
213}
214
215void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
216 size_t nr)
217{
218 bitmap_set_atomic(rb->receivedmap,
219 ramblock_recv_bitmap_offset(host_addr, rb),
220 nr);
221}
222
223#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
224
225
226
227
228
229
230int64_t ramblock_recv_bitmap_send(QEMUFile *file,
231 const char *block_name)
232{
233 RAMBlock *block = qemu_ram_block_by_name(block_name);
234 unsigned long *le_bitmap, nbits;
235 uint64_t size;
236
237 if (!block) {
238 error_report("%s: invalid block name: %s", __func__, block_name);
239 return -1;
240 }
241
242 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
243
244
245
246
247
248
249 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
250
251
252
253
254
255
256 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
257
258
259 size = DIV_ROUND_UP(nbits, 8);
260
261
262
263
264
265
266
267 size = ROUND_UP(size, 8);
268
269 qemu_put_be64(file, size);
270 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
271
272
273
274
275 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
276 qemu_fflush(file);
277
278 g_free(le_bitmap);
279
280 if (qemu_file_get_error(file)) {
281 return qemu_file_get_error(file);
282 }
283
284 return size + sizeof(size);
285}
286
287
288
289
290
291struct RAMSrcPageRequest {
292 RAMBlock *rb;
293 hwaddr offset;
294 hwaddr len;
295
296 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
297};
298
299typedef struct {
300
301
302
303
304 RAMBlock *ram_block;
305 unsigned long ram_page;
306
307
308
309
310 bool preempted;
311} PostcopyPreemptState;
312
313
314struct RAMState {
315
316 QEMUFile *f;
317
318 int uffdio_fd;
319
320 RAMBlock *last_seen_block;
321
322 RAMBlock *last_sent_block;
323
324 ram_addr_t last_page;
325
326 uint32_t last_version;
327
328 int dirty_rate_high_cnt;
329
330
331 int64_t time_last_bitmap_sync;
332
333 uint64_t bytes_xfer_prev;
334
335 uint64_t num_dirty_pages_period;
336
337 uint64_t xbzrle_cache_miss_prev;
338
339 uint64_t xbzrle_pages_prev;
340
341 uint64_t xbzrle_bytes_prev;
342
343 bool xbzrle_enabled;
344
345 bool last_stage;
346
347
348 uint64_t compress_thread_busy_prev;
349
350 uint64_t compressed_size_prev;
351
352 uint64_t compress_pages_prev;
353
354
355 uint64_t target_page_count_prev;
356
357 uint64_t target_page_count;
358
359 uint64_t migration_dirty_pages;
360
361 QemuMutex bitmap_mutex;
362
363 RAMBlock *last_req_rb;
364
365 QemuMutex src_page_req_mutex;
366 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
367
368
369 PostcopyPreemptState postcopy_preempt_state;
370
371
372
373
374 unsigned int postcopy_channel;
375};
376typedef struct RAMState RAMState;
377
378static RAMState *ram_state;
379
380static NotifierWithReturnList precopy_notifier_list;
381
382static void postcopy_preempt_reset(RAMState *rs)
383{
384 memset(&rs->postcopy_preempt_state, 0, sizeof(PostcopyPreemptState));
385}
386
387
388static bool postcopy_has_request(RAMState *rs)
389{
390 return !QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests);
391}
392
393void precopy_infrastructure_init(void)
394{
395 notifier_with_return_list_init(&precopy_notifier_list);
396}
397
398void precopy_add_notifier(NotifierWithReturn *n)
399{
400 notifier_with_return_list_add(&precopy_notifier_list, n);
401}
402
403void precopy_remove_notifier(NotifierWithReturn *n)
404{
405 notifier_with_return_remove(n);
406}
407
408int precopy_notify(PrecopyNotifyReason reason, Error **errp)
409{
410 PrecopyNotifyData pnd;
411 pnd.reason = reason;
412 pnd.errp = errp;
413
414 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
415}
416
417uint64_t ram_bytes_remaining(void)
418{
419 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
420 0;
421}
422
423MigrationStats ram_counters;
424
425static void ram_transferred_add(uint64_t bytes)
426{
427 if (runstate_is_running()) {
428 ram_counters.precopy_bytes += bytes;
429 } else if (migration_in_postcopy()) {
430 ram_counters.postcopy_bytes += bytes;
431 } else {
432 ram_counters.downtime_bytes += bytes;
433 }
434 ram_counters.transferred += bytes;
435}
436
437void dirty_sync_missed_zero_copy(void)
438{
439 ram_counters.dirty_sync_missed_zero_copy++;
440}
441
442
443struct PageSearchStatus {
444
445 RAMBlock *block;
446
447 unsigned long page;
448
449 bool complete_round;
450
451
452
453
454
455 bool postcopy_requested;
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471 bool postcopy_target_channel;
472};
473typedef struct PageSearchStatus PageSearchStatus;
474
475CompressionStats compression_counters;
476
477struct CompressParam {
478 bool done;
479 bool quit;
480 bool zero_page;
481 QEMUFile *file;
482 QemuMutex mutex;
483 QemuCond cond;
484 RAMBlock *block;
485 ram_addr_t offset;
486
487
488 z_stream stream;
489 uint8_t *originbuf;
490};
491typedef struct CompressParam CompressParam;
492
493struct DecompressParam {
494 bool done;
495 bool quit;
496 QemuMutex mutex;
497 QemuCond cond;
498 void *des;
499 uint8_t *compbuf;
500 int len;
501 z_stream stream;
502};
503typedef struct DecompressParam DecompressParam;
504
505static CompressParam *comp_param;
506static QemuThread *compress_threads;
507
508
509
510
511static QemuMutex comp_done_lock;
512static QemuCond comp_done_cond;
513
514static QEMUFile *decomp_file;
515static DecompressParam *decomp_param;
516static QemuThread *decompress_threads;
517static QemuMutex decomp_done_lock;
518static QemuCond decomp_done_cond;
519
520static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
521 ram_addr_t offset, uint8_t *source_buf);
522
523static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
524 bool postcopy_requested);
525
526static void *do_data_compress(void *opaque)
527{
528 CompressParam *param = opaque;
529 RAMBlock *block;
530 ram_addr_t offset;
531 bool zero_page;
532
533 qemu_mutex_lock(¶m->mutex);
534 while (!param->quit) {
535 if (param->block) {
536 block = param->block;
537 offset = param->offset;
538 param->block = NULL;
539 qemu_mutex_unlock(¶m->mutex);
540
541 zero_page = do_compress_ram_page(param->file, ¶m->stream,
542 block, offset, param->originbuf);
543
544 qemu_mutex_lock(&comp_done_lock);
545 param->done = true;
546 param->zero_page = zero_page;
547 qemu_cond_signal(&comp_done_cond);
548 qemu_mutex_unlock(&comp_done_lock);
549
550 qemu_mutex_lock(¶m->mutex);
551 } else {
552 qemu_cond_wait(¶m->cond, ¶m->mutex);
553 }
554 }
555 qemu_mutex_unlock(¶m->mutex);
556
557 return NULL;
558}
559
560static void compress_threads_save_cleanup(void)
561{
562 int i, thread_count;
563
564 if (!migrate_use_compression() || !comp_param) {
565 return;
566 }
567
568 thread_count = migrate_compress_threads();
569 for (i = 0; i < thread_count; i++) {
570
571
572
573
574 if (!comp_param[i].file) {
575 break;
576 }
577
578 qemu_mutex_lock(&comp_param[i].mutex);
579 comp_param[i].quit = true;
580 qemu_cond_signal(&comp_param[i].cond);
581 qemu_mutex_unlock(&comp_param[i].mutex);
582
583 qemu_thread_join(compress_threads + i);
584 qemu_mutex_destroy(&comp_param[i].mutex);
585 qemu_cond_destroy(&comp_param[i].cond);
586 deflateEnd(&comp_param[i].stream);
587 g_free(comp_param[i].originbuf);
588 qemu_fclose(comp_param[i].file);
589 comp_param[i].file = NULL;
590 }
591 qemu_mutex_destroy(&comp_done_lock);
592 qemu_cond_destroy(&comp_done_cond);
593 g_free(compress_threads);
594 g_free(comp_param);
595 compress_threads = NULL;
596 comp_param = NULL;
597}
598
599static int compress_threads_save_setup(void)
600{
601 int i, thread_count;
602
603 if (!migrate_use_compression()) {
604 return 0;
605 }
606 thread_count = migrate_compress_threads();
607 compress_threads = g_new0(QemuThread, thread_count);
608 comp_param = g_new0(CompressParam, thread_count);
609 qemu_cond_init(&comp_done_cond);
610 qemu_mutex_init(&comp_done_lock);
611 for (i = 0; i < thread_count; i++) {
612 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
613 if (!comp_param[i].originbuf) {
614 goto exit;
615 }
616
617 if (deflateInit(&comp_param[i].stream,
618 migrate_compress_level()) != Z_OK) {
619 g_free(comp_param[i].originbuf);
620 goto exit;
621 }
622
623
624
625
626 comp_param[i].file = qemu_file_new_output(
627 QIO_CHANNEL(qio_channel_null_new()));
628 comp_param[i].done = true;
629 comp_param[i].quit = false;
630 qemu_mutex_init(&comp_param[i].mutex);
631 qemu_cond_init(&comp_param[i].cond);
632 qemu_thread_create(compress_threads + i, "compress",
633 do_data_compress, comp_param + i,
634 QEMU_THREAD_JOINABLE);
635 }
636 return 0;
637
638exit:
639 compress_threads_save_cleanup();
640 return -1;
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
656 ram_addr_t offset)
657{
658 size_t size, len;
659
660 if (block == rs->last_sent_block) {
661 offset |= RAM_SAVE_FLAG_CONTINUE;
662 }
663 qemu_put_be64(f, offset);
664 size = 8;
665
666 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
667 len = strlen(block->idstr);
668 qemu_put_byte(f, len);
669 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
670 size += 1 + len;
671 rs->last_sent_block = block;
672 }
673 return size;
674}
675
676
677
678
679
680
681
682
683
684
685static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
686 uint64_t bytes_dirty_threshold)
687{
688 MigrationState *s = migrate_get_current();
689 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
690 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
691 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
692 int pct_max = s->parameters.max_cpu_throttle;
693
694 uint64_t throttle_now = cpu_throttle_get_percentage();
695 uint64_t cpu_now, cpu_ideal, throttle_inc;
696
697
698 if (!cpu_throttle_active()) {
699 cpu_throttle_set(pct_initial);
700 } else {
701
702 if (!pct_tailslow) {
703 throttle_inc = pct_increment;
704 } else {
705
706
707 cpu_now = 100 - throttle_now;
708 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
709 bytes_dirty_period);
710 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
711 }
712 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
713 }
714}
715
716void mig_throttle_counter_reset(void)
717{
718 RAMState *rs = ram_state;
719
720 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
721 rs->num_dirty_pages_period = 0;
722 rs->bytes_xfer_prev = ram_counters.transferred;
723}
724
725
726
727
728
729
730
731
732
733
734
735
736
737static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
738{
739 if (!rs->xbzrle_enabled) {
740 return;
741 }
742
743
744
745 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
746 ram_counters.dirty_sync_count);
747}
748
749#define ENCODING_FLAG_XBZRLE 0x1
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
765 ram_addr_t current_addr, RAMBlock *block,
766 ram_addr_t offset)
767{
768 int encoded_len = 0, bytes_xbzrle;
769 uint8_t *prev_cached_page;
770
771 if (!cache_is_cached(XBZRLE.cache, current_addr,
772 ram_counters.dirty_sync_count)) {
773 xbzrle_counters.cache_miss++;
774 if (!rs->last_stage) {
775 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
776 ram_counters.dirty_sync_count) == -1) {
777 return -1;
778 } else {
779
780
781 *current_data = get_cached_data(XBZRLE.cache, current_addr);
782 }
783 }
784 return -1;
785 }
786
787
788
789
790
791
792
793
794
795
796
797
798 xbzrle_counters.pages++;
799 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
800
801
802 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
803
804
805 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
806 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
807 TARGET_PAGE_SIZE);
808
809
810
811
812
813 if (!rs->last_stage && encoded_len != 0) {
814 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
815
816
817
818
819
820 *current_data = prev_cached_page;
821 }
822
823 if (encoded_len == 0) {
824 trace_save_xbzrle_page_skipping();
825 return 0;
826 } else if (encoded_len == -1) {
827 trace_save_xbzrle_page_overflow();
828 xbzrle_counters.overflow++;
829 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
830 return -1;
831 }
832
833
834 bytes_xbzrle = save_page_header(rs, rs->f, block,
835 offset | RAM_SAVE_FLAG_XBZRLE);
836 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
837 qemu_put_be16(rs->f, encoded_len);
838 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
839 bytes_xbzrle += encoded_len + 1 + 2;
840
841
842
843
844
845 xbzrle_counters.bytes += bytes_xbzrle - 8;
846 ram_transferred_add(bytes_xbzrle);
847
848 return 1;
849}
850
851
852
853
854
855
856
857
858
859
860static inline
861unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
862 unsigned long start)
863{
864 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
865 unsigned long *bitmap = rb->bmap;
866
867 if (ramblock_is_ignored(rb)) {
868 return size;
869 }
870
871 return find_next_bit(bitmap, size, start);
872}
873
874static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
875 unsigned long page)
876{
877 uint8_t shift;
878 hwaddr size, start;
879
880 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
881 return;
882 }
883
884 shift = rb->clear_bmap_shift;
885
886
887
888
889
890
891
892
893 assert(shift >= 6);
894
895 size = 1ULL << (TARGET_PAGE_BITS + shift);
896 start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
897 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
898 memory_region_clear_dirty_bitmap(rb->mr, start, size);
899}
900
901static void
902migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
903 unsigned long start,
904 unsigned long npages)
905{
906 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
907 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
908 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
909
910
911
912
913
914 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
915 migration_clear_memory_region_dirty_bitmap(rb, i);
916 }
917}
918
919
920
921
922
923
924
925
926
927
928
929
930static inline
931unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
932 unsigned long start, unsigned long *num)
933{
934 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
935 unsigned long *bitmap = rb->bmap;
936 unsigned long first, next;
937
938 *num = 0;
939
940 if (ramblock_is_ignored(rb)) {
941 return size;
942 }
943
944 first = find_next_bit(bitmap, size, start);
945 if (first >= size) {
946 return first;
947 }
948 next = find_next_zero_bit(bitmap, size, first + 1);
949 assert(next >= first);
950 *num = next - first;
951 return first;
952}
953
954static inline bool migration_bitmap_clear_dirty(RAMState *rs,
955 RAMBlock *rb,
956 unsigned long page)
957{
958 bool ret;
959
960
961
962
963
964
965
966
967
968 migration_clear_memory_region_dirty_bitmap(rb, page);
969
970 ret = test_and_clear_bit(page, rb->bmap);
971 if (ret) {
972 rs->migration_dirty_pages--;
973 }
974
975 return ret;
976}
977
978static void dirty_bitmap_clear_section(MemoryRegionSection *section,
979 void *opaque)
980{
981 const hwaddr offset = section->offset_within_region;
982 const hwaddr size = int128_get64(section->size);
983 const unsigned long start = offset >> TARGET_PAGE_BITS;
984 const unsigned long npages = size >> TARGET_PAGE_BITS;
985 RAMBlock *rb = section->mr->ram_block;
986 uint64_t *cleared_bits = opaque;
987
988
989
990
991
992
993 if (!migration_in_postcopy() && !migrate_background_snapshot()) {
994 migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
995 }
996 *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
997 bitmap_clear(rb->bmap, start, npages);
998}
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
1014{
1015 uint64_t cleared_bits = 0;
1016
1017 if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
1018 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1019 MemoryRegionSection section = {
1020 .mr = rb->mr,
1021 .offset_within_region = 0,
1022 .size = int128_make64(qemu_ram_get_used_length(rb)),
1023 };
1024
1025 ram_discard_manager_replay_discarded(rdm, §ion,
1026 dirty_bitmap_clear_section,
1027 &cleared_bits);
1028 }
1029 return cleared_bits;
1030}
1031
1032
1033
1034
1035
1036
1037
1038bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
1039{
1040 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1041 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1042 MemoryRegionSection section = {
1043 .mr = rb->mr,
1044 .offset_within_region = start,
1045 .size = int128_make64(qemu_ram_pagesize(rb)),
1046 };
1047
1048 return !ram_discard_manager_is_populated(rdm, §ion);
1049 }
1050 return false;
1051}
1052
1053
1054static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
1055{
1056 uint64_t new_dirty_pages =
1057 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
1058
1059 rs->migration_dirty_pages += new_dirty_pages;
1060 rs->num_dirty_pages_period += new_dirty_pages;
1061}
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072uint64_t ram_pagesize_summary(void)
1073{
1074 RAMBlock *block;
1075 uint64_t summary = 0;
1076
1077 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1078 summary |= block->page_size;
1079 }
1080
1081 return summary;
1082}
1083
1084uint64_t ram_get_total_transferred_pages(void)
1085{
1086 return ram_counters.normal + ram_counters.duplicate +
1087 compression_counters.pages + xbzrle_counters.pages;
1088}
1089
1090static void migration_update_rates(RAMState *rs, int64_t end_time)
1091{
1092 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
1093 double compressed_size;
1094
1095
1096 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1097 / (end_time - rs->time_last_bitmap_sync);
1098
1099 if (!page_count) {
1100 return;
1101 }
1102
1103 if (migrate_use_xbzrle()) {
1104 double encoded_size, unencoded_size;
1105
1106 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1107 rs->xbzrle_cache_miss_prev) / page_count;
1108 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1109 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
1110 TARGET_PAGE_SIZE;
1111 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
1112 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
1113 xbzrle_counters.encoding_rate = 0;
1114 } else {
1115 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
1116 }
1117 rs->xbzrle_pages_prev = xbzrle_counters.pages;
1118 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
1119 }
1120
1121 if (migrate_use_compression()) {
1122 compression_counters.busy_rate = (double)(compression_counters.busy -
1123 rs->compress_thread_busy_prev) / page_count;
1124 rs->compress_thread_busy_prev = compression_counters.busy;
1125
1126 compressed_size = compression_counters.compressed_size -
1127 rs->compressed_size_prev;
1128 if (compressed_size) {
1129 double uncompressed_size = (compression_counters.pages -
1130 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1131
1132
1133 compression_counters.compression_rate =
1134 uncompressed_size / compressed_size;
1135
1136 rs->compress_pages_prev = compression_counters.pages;
1137 rs->compressed_size_prev = compression_counters.compressed_size;
1138 }
1139 }
1140}
1141
1142static void migration_trigger_throttle(RAMState *rs)
1143{
1144 MigrationState *s = migrate_get_current();
1145 uint64_t threshold = s->parameters.throttle_trigger_threshold;
1146
1147 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
1148 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1149 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1150
1151
1152
1153
1154 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1155
1156
1157
1158
1159
1160
1161 if ((bytes_dirty_period > bytes_dirty_threshold) &&
1162 (++rs->dirty_rate_high_cnt >= 2)) {
1163 trace_migration_throttle();
1164 rs->dirty_rate_high_cnt = 0;
1165 mig_throttle_guest_down(bytes_dirty_period,
1166 bytes_dirty_threshold);
1167 }
1168 }
1169}
1170
1171static void migration_bitmap_sync(RAMState *rs)
1172{
1173 RAMBlock *block;
1174 int64_t end_time;
1175
1176 ram_counters.dirty_sync_count++;
1177
1178 if (!rs->time_last_bitmap_sync) {
1179 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1180 }
1181
1182 trace_migration_bitmap_sync_start();
1183 memory_global_dirty_log_sync();
1184
1185 qemu_mutex_lock(&rs->bitmap_mutex);
1186 WITH_RCU_READ_LOCK_GUARD() {
1187 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1188 ramblock_sync_dirty_bitmap(rs, block);
1189 }
1190 ram_counters.remaining = ram_bytes_remaining();
1191 }
1192 qemu_mutex_unlock(&rs->bitmap_mutex);
1193
1194 memory_global_after_dirty_log_sync();
1195 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1196
1197 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1198
1199
1200 if (end_time > rs->time_last_bitmap_sync + 1000) {
1201 migration_trigger_throttle(rs);
1202
1203 migration_update_rates(rs, end_time);
1204
1205 rs->target_page_count_prev = rs->target_page_count;
1206
1207
1208 rs->time_last_bitmap_sync = end_time;
1209 rs->num_dirty_pages_period = 0;
1210 rs->bytes_xfer_prev = ram_counters.transferred;
1211 }
1212 if (migrate_use_events()) {
1213 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1214 }
1215}
1216
1217static void migration_bitmap_sync_precopy(RAMState *rs)
1218{
1219 Error *local_err = NULL;
1220
1221
1222
1223
1224
1225 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1226 error_report_err(local_err);
1227 local_err = NULL;
1228 }
1229
1230 migration_bitmap_sync(rs);
1231
1232 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1233 error_report_err(local_err);
1234 }
1235}
1236
1237static void ram_release_page(const char *rbname, uint64_t offset)
1238{
1239 if (!migrate_release_ram() || !migration_in_postcopy()) {
1240 return;
1241 }
1242
1243 ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1258 RAMBlock *block, ram_addr_t offset)
1259{
1260 uint8_t *p = block->host + offset;
1261 int len = 0;
1262
1263 if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
1264 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1265 qemu_put_byte(file, 0);
1266 len += 1;
1267 ram_release_page(block->idstr, offset);
1268 }
1269 return len;
1270}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1282{
1283 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1284
1285 if (len) {
1286 ram_counters.duplicate++;
1287 ram_transferred_add(len);
1288 return 1;
1289 }
1290 return -1;
1291}
1292
1293
1294
1295
1296
1297
1298
1299
1300static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1301 int *pages)
1302{
1303 uint64_t bytes_xmit = 0;
1304 int ret;
1305
1306 *pages = -1;
1307 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1308 &bytes_xmit);
1309 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1310 return false;
1311 }
1312
1313 if (bytes_xmit) {
1314 ram_transferred_add(bytes_xmit);
1315 *pages = 1;
1316 }
1317
1318 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1319 return true;
1320 }
1321
1322 if (bytes_xmit > 0) {
1323 ram_counters.normal++;
1324 } else if (bytes_xmit == 0) {
1325 ram_counters.duplicate++;
1326 }
1327
1328 return true;
1329}
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1343 uint8_t *buf, bool async)
1344{
1345 ram_transferred_add(save_page_header(rs, rs->f, block,
1346 offset | RAM_SAVE_FLAG_PAGE));
1347 if (async) {
1348 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1349 migrate_release_ram() &&
1350 migration_in_postcopy());
1351 } else {
1352 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1353 }
1354 ram_transferred_add(TARGET_PAGE_SIZE);
1355 ram_counters.normal++;
1356 return 1;
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
1372{
1373 int pages = -1;
1374 uint8_t *p;
1375 bool send_async = true;
1376 RAMBlock *block = pss->block;
1377 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1378 ram_addr_t current_addr = block->offset + offset;
1379
1380 p = block->host + offset;
1381 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1382
1383 XBZRLE_cache_lock();
1384 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
1385 pages = save_xbzrle_page(rs, &p, current_addr, block,
1386 offset);
1387 if (!rs->last_stage) {
1388
1389
1390
1391 send_async = false;
1392 }
1393 }
1394
1395
1396 if (pages == -1) {
1397 pages = save_normal_page(rs, block, offset, p, send_async);
1398 }
1399
1400 XBZRLE_cache_unlock();
1401
1402 return pages;
1403}
1404
1405static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1406 ram_addr_t offset)
1407{
1408 if (multifd_queue_page(rs->f, block, offset) < 0) {
1409 return -1;
1410 }
1411 ram_counters.normal++;
1412
1413 return 1;
1414}
1415
1416static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1417 ram_addr_t offset, uint8_t *source_buf)
1418{
1419 RAMState *rs = ram_state;
1420 uint8_t *p = block->host + offset;
1421 int ret;
1422
1423 if (save_zero_page_to_file(rs, f, block, offset)) {
1424 return true;
1425 }
1426
1427 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1428
1429
1430
1431
1432
1433
1434 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1435 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1436 if (ret < 0) {
1437 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1438 error_report("compressed data failed!");
1439 }
1440 return false;
1441}
1442
1443static void
1444update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1445{
1446 ram_transferred_add(bytes_xmit);
1447
1448 if (param->zero_page) {
1449 ram_counters.duplicate++;
1450 return;
1451 }
1452
1453
1454 compression_counters.compressed_size += bytes_xmit - 8;
1455 compression_counters.pages++;
1456}
1457
1458static bool save_page_use_compression(RAMState *rs);
1459
1460static void flush_compressed_data(RAMState *rs)
1461{
1462 int idx, len, thread_count;
1463
1464 if (!save_page_use_compression(rs)) {
1465 return;
1466 }
1467 thread_count = migrate_compress_threads();
1468
1469 qemu_mutex_lock(&comp_done_lock);
1470 for (idx = 0; idx < thread_count; idx++) {
1471 while (!comp_param[idx].done) {
1472 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1473 }
1474 }
1475 qemu_mutex_unlock(&comp_done_lock);
1476
1477 for (idx = 0; idx < thread_count; idx++) {
1478 qemu_mutex_lock(&comp_param[idx].mutex);
1479 if (!comp_param[idx].quit) {
1480 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1481
1482
1483
1484
1485
1486 update_compress_thread_counts(&comp_param[idx], len);
1487 }
1488 qemu_mutex_unlock(&comp_param[idx].mutex);
1489 }
1490}
1491
1492static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1493 ram_addr_t offset)
1494{
1495 param->block = block;
1496 param->offset = offset;
1497}
1498
1499static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1500 ram_addr_t offset)
1501{
1502 int idx, thread_count, bytes_xmit = -1, pages = -1;
1503 bool wait = migrate_compress_wait_thread();
1504
1505 thread_count = migrate_compress_threads();
1506 qemu_mutex_lock(&comp_done_lock);
1507retry:
1508 for (idx = 0; idx < thread_count; idx++) {
1509 if (comp_param[idx].done) {
1510 comp_param[idx].done = false;
1511 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1512 qemu_mutex_lock(&comp_param[idx].mutex);
1513 set_compress_params(&comp_param[idx], block, offset);
1514 qemu_cond_signal(&comp_param[idx].cond);
1515 qemu_mutex_unlock(&comp_param[idx].mutex);
1516 pages = 1;
1517 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1518 break;
1519 }
1520 }
1521
1522
1523
1524
1525
1526 if (pages < 0 && wait) {
1527 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1528 goto retry;
1529 }
1530 qemu_mutex_unlock(&comp_done_lock);
1531
1532 return pages;
1533}
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1546{
1547
1548
1549
1550
1551 pss->postcopy_requested = false;
1552 pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
1553
1554 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1555 if (pss->complete_round && pss->block == rs->last_seen_block &&
1556 pss->page >= rs->last_page) {
1557
1558
1559
1560
1561 *again = false;
1562 return false;
1563 }
1564 if (!offset_in_ramblock(pss->block,
1565 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
1566
1567 pss->page = 0;
1568 pss->block = QLIST_NEXT_RCU(pss->block, next);
1569 if (!pss->block) {
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579 flush_compressed_data(rs);
1580
1581
1582 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1583
1584 pss->complete_round = true;
1585
1586 if (migrate_use_xbzrle()) {
1587 rs->xbzrle_enabled = true;
1588 }
1589 }
1590
1591 *again = true;
1592 return false;
1593 } else {
1594
1595 *again = true;
1596
1597 return true;
1598 }
1599}
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1612{
1613 struct RAMSrcPageRequest *entry;
1614 RAMBlock *block = NULL;
1615
1616 if (!postcopy_has_request(rs)) {
1617 return NULL;
1618 }
1619
1620 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1621
1622
1623
1624
1625
1626 assert(postcopy_has_request(rs));
1627
1628 entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
1629 block = entry->rb;
1630 *offset = entry->offset;
1631
1632 if (entry->len > TARGET_PAGE_SIZE) {
1633 entry->len -= TARGET_PAGE_SIZE;
1634 entry->offset += TARGET_PAGE_SIZE;
1635 } else {
1636 memory_region_unref(block->mr);
1637 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1638 g_free(entry);
1639 migration_consume_urgent_request();
1640 }
1641
1642 return block;
1643}
1644
1645#if defined(__linux__)
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1657{
1658 struct uffd_msg uffd_msg;
1659 void *page_address;
1660 RAMBlock *block;
1661 int res;
1662
1663 if (!migrate_background_snapshot()) {
1664 return NULL;
1665 }
1666
1667 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1668 if (res <= 0) {
1669 return NULL;
1670 }
1671
1672 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
1673 block = qemu_ram_block_from_host(page_address, false, offset);
1674 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1675 return block;
1676}
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1689 unsigned long start_page)
1690{
1691 int res = 0;
1692
1693
1694 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1695 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1696 uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS;
1697
1698
1699 qemu_fflush(rs->f);
1700
1701 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1702 false, false);
1703 }
1704
1705 return res;
1706}
1707
1708
1709
1710
1711
1712bool ram_write_tracking_available(void)
1713{
1714 uint64_t uffd_features;
1715 int res;
1716
1717 res = uffd_query_features(&uffd_features);
1718 return (res == 0 &&
1719 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1720}
1721
1722
1723
1724
1725
1726
1727bool ram_write_tracking_compatible(void)
1728{
1729 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1730 int uffd_fd;
1731 RAMBlock *block;
1732 bool ret = false;
1733
1734
1735 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1736 if (uffd_fd < 0) {
1737 return false;
1738 }
1739
1740 RCU_READ_LOCK_GUARD();
1741
1742 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1743 uint64_t uffd_ioctls;
1744
1745
1746 if (block->mr->readonly || block->mr->rom_device) {
1747 continue;
1748 }
1749
1750 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1751 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1752 goto out;
1753 }
1754 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1755 goto out;
1756 }
1757 }
1758 ret = true;
1759
1760out:
1761 uffd_close_fd(uffd_fd);
1762 return ret;
1763}
1764
1765static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1766 ram_addr_t size)
1767{
1768
1769
1770
1771
1772
1773
1774 for (; offset < size; offset += block->page_size) {
1775 char tmp = *((char *)block->host + offset);
1776
1777
1778 asm volatile("" : "+r" (tmp));
1779 }
1780}
1781
1782static inline int populate_read_section(MemoryRegionSection *section,
1783 void *opaque)
1784{
1785 const hwaddr size = int128_get64(section->size);
1786 hwaddr offset = section->offset_within_region;
1787 RAMBlock *block = section->mr->ram_block;
1788
1789 populate_read_range(block, offset, size);
1790 return 0;
1791}
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802static void ram_block_populate_read(RAMBlock *rb)
1803{
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1816 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1817 MemoryRegionSection section = {
1818 .mr = rb->mr,
1819 .offset_within_region = 0,
1820 .size = rb->mr->size,
1821 };
1822
1823 ram_discard_manager_replay_populated(rdm, §ion,
1824 populate_read_section, NULL);
1825 } else {
1826 populate_read_range(rb, 0, rb->used_length);
1827 }
1828}
1829
1830
1831
1832
1833void ram_write_tracking_prepare(void)
1834{
1835 RAMBlock *block;
1836
1837 RCU_READ_LOCK_GUARD();
1838
1839 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1840
1841 if (block->mr->readonly || block->mr->rom_device) {
1842 continue;
1843 }
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853 ram_block_populate_read(block);
1854 }
1855}
1856
1857
1858
1859
1860
1861
1862int ram_write_tracking_start(void)
1863{
1864 int uffd_fd;
1865 RAMState *rs = ram_state;
1866 RAMBlock *block;
1867
1868
1869 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1870 if (uffd_fd < 0) {
1871 return uffd_fd;
1872 }
1873 rs->uffdio_fd = uffd_fd;
1874
1875 RCU_READ_LOCK_GUARD();
1876
1877 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1878
1879 if (block->mr->readonly || block->mr->rom_device) {
1880 continue;
1881 }
1882
1883
1884 if (uffd_register_memory(rs->uffdio_fd, block->host,
1885 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1886 goto fail;
1887 }
1888
1889 if (uffd_change_protection(rs->uffdio_fd, block->host,
1890 block->max_length, true, false)) {
1891 goto fail;
1892 }
1893 block->flags |= RAM_UF_WRITEPROTECT;
1894 memory_region_ref(block->mr);
1895
1896 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1897 block->host, block->max_length);
1898 }
1899
1900 return 0;
1901
1902fail:
1903 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1904
1905 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1906 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1907 continue;
1908 }
1909
1910
1911
1912
1913 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1914 false, false);
1915 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1916
1917 block->flags &= ~RAM_UF_WRITEPROTECT;
1918 memory_region_unref(block->mr);
1919 }
1920
1921 uffd_close_fd(uffd_fd);
1922 rs->uffdio_fd = -1;
1923 return -1;
1924}
1925
1926
1927
1928
1929void ram_write_tracking_stop(void)
1930{
1931 RAMState *rs = ram_state;
1932 RAMBlock *block;
1933
1934 RCU_READ_LOCK_GUARD();
1935
1936 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1937 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1938 continue;
1939 }
1940
1941 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1942 false, false);
1943 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1944
1945 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1946 block->host, block->max_length);
1947
1948
1949 block->flags &= ~RAM_UF_WRITEPROTECT;
1950 memory_region_unref(block->mr);
1951 }
1952
1953
1954 uffd_close_fd(rs->uffdio_fd);
1955 rs->uffdio_fd = -1;
1956}
1957
1958#else
1959
1960
1961static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1962{
1963 (void) rs;
1964 (void) offset;
1965
1966 return NULL;
1967}
1968
1969static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1970 unsigned long start_page)
1971{
1972 (void) rs;
1973 (void) pss;
1974 (void) start_page;
1975
1976 return 0;
1977}
1978
1979bool ram_write_tracking_available(void)
1980{
1981 return false;
1982}
1983
1984bool ram_write_tracking_compatible(void)
1985{
1986 assert(0);
1987 return false;
1988}
1989
1990int ram_write_tracking_start(void)
1991{
1992 assert(0);
1993 return -1;
1994}
1995
1996void ram_write_tracking_stop(void)
1997{
1998 assert(0);
1999}
2000#endif
2001
2002
2003
2004
2005
2006static bool offset_on_same_huge_page(RAMBlock *rb, uint64_t addr1,
2007 uint64_t addr2)
2008{
2009 size_t page_size = qemu_ram_pagesize(rb);
2010
2011 addr1 = ROUND_DOWN(addr1, page_size);
2012 addr2 = ROUND_DOWN(addr2, page_size);
2013
2014 return addr1 == addr2;
2015}
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032static bool postcopy_preempted_contains(RAMState *rs, RAMBlock *block,
2033 ram_addr_t offset)
2034{
2035 PostcopyPreemptState *state = &rs->postcopy_preempt_state;
2036
2037
2038 if (!state->preempted) {
2039 return false;
2040 }
2041
2042
2043 if (state->ram_block != block) {
2044 return false;
2045 }
2046
2047 return offset_on_same_huge_page(block, offset,
2048 state->ram_page << TARGET_PAGE_BITS);
2049}
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
2062{
2063 RAMBlock *block;
2064 ram_addr_t offset;
2065 bool dirty;
2066
2067 do {
2068 block = unqueue_page(rs, &offset);
2069
2070
2071
2072
2073
2074
2075 if (block) {
2076 unsigned long page;
2077
2078 page = offset >> TARGET_PAGE_BITS;
2079 dirty = test_bit(page, block->bmap);
2080 if (!dirty) {
2081 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
2082 page);
2083 } else {
2084 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
2085 }
2086 }
2087
2088 } while (block && !dirty);
2089
2090 if (block) {
2091
2092 if (postcopy_preempted_contains(rs, block, offset)) {
2093 trace_postcopy_preempt_hit(block->idstr, offset);
2094
2095
2096
2097
2098
2099
2100 postcopy_preempt_restore(rs, pss, true);
2101 return true;
2102 }
2103 } else {
2104
2105
2106
2107
2108 block = poll_fault_page(rs, &offset);
2109 }
2110
2111 if (block) {
2112
2113
2114
2115
2116
2117 pss->block = block;
2118 pss->page = offset >> TARGET_PAGE_BITS;
2119
2120
2121
2122
2123
2124 pss->complete_round = false;
2125
2126 pss->postcopy_requested = true;
2127 pss->postcopy_target_channel = RAM_CHANNEL_POSTCOPY;
2128 }
2129
2130 return !!block;
2131}
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141static void migration_page_queue_free(RAMState *rs)
2142{
2143 struct RAMSrcPageRequest *mspr, *next_mspr;
2144
2145
2146
2147 RCU_READ_LOCK_GUARD();
2148 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
2149 memory_region_unref(mspr->rb->mr);
2150 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
2151 g_free(mspr);
2152 }
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2168{
2169 RAMBlock *ramblock;
2170 RAMState *rs = ram_state;
2171
2172 ram_counters.postcopy_requests++;
2173 RCU_READ_LOCK_GUARD();
2174
2175 if (!rbname) {
2176
2177 ramblock = rs->last_req_rb;
2178
2179 if (!ramblock) {
2180
2181
2182
2183
2184 error_report("ram_save_queue_pages no previous block");
2185 return -1;
2186 }
2187 } else {
2188 ramblock = qemu_ram_block_by_name(rbname);
2189
2190 if (!ramblock) {
2191
2192 error_report("ram_save_queue_pages no block '%s'", rbname);
2193 return -1;
2194 }
2195 rs->last_req_rb = ramblock;
2196 }
2197 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2198 if (!offset_in_ramblock(ramblock, start + len - 1)) {
2199 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2200 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2201 __func__, start, len, ramblock->used_length);
2202 return -1;
2203 }
2204
2205 struct RAMSrcPageRequest *new_entry =
2206 g_new0(struct RAMSrcPageRequest, 1);
2207 new_entry->rb = ramblock;
2208 new_entry->offset = start;
2209 new_entry->len = len;
2210
2211 memory_region_ref(ramblock->mr);
2212 qemu_mutex_lock(&rs->src_page_req_mutex);
2213 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2214 migration_make_urgent_request();
2215 qemu_mutex_unlock(&rs->src_page_req_mutex);
2216
2217 return 0;
2218}
2219
2220static bool save_page_use_compression(RAMState *rs)
2221{
2222 if (!migrate_use_compression()) {
2223 return false;
2224 }
2225
2226
2227
2228
2229
2230
2231 if (rs->xbzrle_enabled) {
2232 return false;
2233 }
2234
2235 return true;
2236}
2237
2238
2239
2240
2241
2242
2243static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2244{
2245 if (!save_page_use_compression(rs)) {
2246 return false;
2247 }
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259 if (block != rs->last_sent_block) {
2260 flush_compressed_data(rs);
2261 return false;
2262 }
2263
2264 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2265 return true;
2266 }
2267
2268 compression_counters.busy++;
2269 return false;
2270}
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
2281{
2282 RAMBlock *block = pss->block;
2283 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
2284 int res;
2285
2286 if (control_save_page(rs, block, offset, &res)) {
2287 return res;
2288 }
2289
2290 if (save_compress_page(rs, block, offset)) {
2291 return 1;
2292 }
2293
2294 res = save_zero_page(rs, block, offset);
2295 if (res > 0) {
2296
2297
2298
2299 if (!save_page_use_compression(rs)) {
2300 XBZRLE_cache_lock();
2301 xbzrle_cache_zero_page(rs, block->offset + offset);
2302 XBZRLE_cache_unlock();
2303 }
2304 return res;
2305 }
2306
2307
2308
2309
2310
2311
2312
2313 if (migrate_use_multifd() && !migration_in_postcopy()) {
2314 return ram_save_multifd_page(rs, block, offset);
2315 }
2316
2317 return ram_save_page(rs, pss);
2318}
2319
2320static bool postcopy_needs_preempt(RAMState *rs, PageSearchStatus *pss)
2321{
2322 MigrationState *ms = migrate_get_current();
2323
2324
2325 if (!migrate_postcopy_preempt()) {
2326 return false;
2327 }
2328
2329
2330 if (!ms->postcopy_preempt_break_huge) {
2331 return false;
2332 }
2333
2334
2335 if (qemu_ram_pagesize(pss->block) == TARGET_PAGE_SIZE) {
2336 return false;
2337 }
2338
2339
2340 if (!migration_in_postcopy()) {
2341 return false;
2342 }
2343
2344
2345
2346
2347
2348 if (pss->postcopy_requested) {
2349 return false;
2350 }
2351
2352
2353 return postcopy_has_request(rs);
2354}
2355
2356
2357static void postcopy_do_preempt(RAMState *rs, PageSearchStatus *pss)
2358{
2359 PostcopyPreemptState *p_state = &rs->postcopy_preempt_state;
2360
2361 trace_postcopy_preempt_triggered(pss->block->idstr, pss->page);
2362
2363
2364
2365
2366
2367
2368
2369 p_state->ram_block = pss->block;
2370 p_state->ram_page = pss->page;
2371 p_state->preempted = true;
2372}
2373
2374
2375static bool postcopy_preempt_triggered(RAMState *rs)
2376{
2377 return rs->postcopy_preempt_state.preempted;
2378}
2379
2380static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
2381 bool postcopy_requested)
2382{
2383 PostcopyPreemptState *state = &rs->postcopy_preempt_state;
2384
2385 assert(state->preempted);
2386
2387 pss->block = state->ram_block;
2388 pss->page = state->ram_page;
2389
2390
2391 pss->postcopy_requested = postcopy_requested;
2392
2393
2394
2395
2396
2397 pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
2398
2399 trace_postcopy_preempt_restored(pss->block->idstr, pss->page);
2400
2401
2402 postcopy_preempt_reset(rs);
2403}
2404
2405static void postcopy_preempt_choose_channel(RAMState *rs, PageSearchStatus *pss)
2406{
2407 MigrationState *s = migrate_get_current();
2408 unsigned int channel = pss->postcopy_target_channel;
2409 QEMUFile *next;
2410
2411 if (channel != rs->postcopy_channel) {
2412 if (channel == RAM_CHANNEL_PRECOPY) {
2413 next = s->to_dst_file;
2414 } else {
2415 next = s->postcopy_qemufile_src;
2416 }
2417
2418 rs->f = next;
2419 rs->postcopy_channel = channel;
2420
2421
2422
2423
2424
2425 rs->last_sent_block = NULL;
2426
2427 trace_postcopy_preempt_switch_channel(channel);
2428 }
2429
2430 trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);
2431}
2432
2433
2434static void postcopy_preempt_reset_channel(RAMState *rs)
2435{
2436 if (migrate_postcopy_preempt() && migration_in_postcopy()) {
2437 rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
2438 rs->f = migrate_get_current()->to_dst_file;
2439 trace_postcopy_preempt_reset_channel();
2440 }
2441}
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
2460{
2461 int tmppages, pages = 0;
2462 size_t pagesize_bits =
2463 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2464 unsigned long hostpage_boundary =
2465 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
2466 unsigned long start_page = pss->page;
2467 int res;
2468
2469 if (ramblock_is_ignored(pss->block)) {
2470 error_report("block %s should not be migrated !", pss->block->idstr);
2471 return 0;
2472 }
2473
2474 if (migrate_postcopy_preempt() && migration_in_postcopy()) {
2475 postcopy_preempt_choose_channel(rs, pss);
2476 }
2477
2478 do {
2479 if (postcopy_needs_preempt(rs, pss)) {
2480 postcopy_do_preempt(rs, pss);
2481 break;
2482 }
2483
2484
2485 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2486 tmppages = ram_save_target_page(rs, pss);
2487 if (tmppages < 0) {
2488 return tmppages;
2489 }
2490
2491 pages += tmppages;
2492
2493
2494
2495
2496 if (pagesize_bits > 1 && tmppages > 0) {
2497 migration_rate_limit();
2498 }
2499 }
2500 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2501 } while ((pss->page < hostpage_boundary) &&
2502 offset_in_ramblock(pss->block,
2503 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
2504
2505 pss->page = MIN(pss->page, hostpage_boundary);
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516 if (migrate_postcopy_preempt() && pss->postcopy_requested) {
2517 qemu_fflush(rs->f);
2518 }
2519
2520 res = ram_save_release_protection(rs, pss, start_page);
2521 return (res < 0 ? res : pages);
2522}
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537static int ram_find_and_save_block(RAMState *rs)
2538{
2539 PageSearchStatus pss;
2540 int pages = 0;
2541 bool again, found;
2542
2543
2544 if (!ram_bytes_total()) {
2545 return pages;
2546 }
2547
2548
2549
2550
2551
2552
2553
2554
2555 if (!rs->last_seen_block) {
2556 rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
2557 rs->last_page = 0;
2558 }
2559
2560 pss.block = rs->last_seen_block;
2561 pss.page = rs->last_page;
2562 pss.complete_round = false;
2563
2564 do {
2565 again = true;
2566 found = get_queued_page(rs, &pss);
2567
2568 if (!found) {
2569
2570
2571
2572
2573 if (postcopy_preempt_triggered(rs)) {
2574 postcopy_preempt_restore(rs, &pss, false);
2575 found = true;
2576 } else {
2577
2578 found = find_dirty_block(rs, &pss, &again);
2579 }
2580 }
2581
2582 if (found) {
2583 pages = ram_save_host_page(rs, &pss);
2584 }
2585 } while (!pages && again);
2586
2587 rs->last_seen_block = pss.block;
2588 rs->last_page = pss.page;
2589
2590 return pages;
2591}
2592
2593void acct_update_position(QEMUFile *f, size_t size, bool zero)
2594{
2595 uint64_t pages = size / TARGET_PAGE_SIZE;
2596
2597 if (zero) {
2598 ram_counters.duplicate += pages;
2599 } else {
2600 ram_counters.normal += pages;
2601 ram_transferred_add(size);
2602 qemu_file_credit_transfer(f, size);
2603 }
2604}
2605
2606static uint64_t ram_bytes_total_common(bool count_ignored)
2607{
2608 RAMBlock *block;
2609 uint64_t total = 0;
2610
2611 RCU_READ_LOCK_GUARD();
2612
2613 if (count_ignored) {
2614 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2615 total += block->used_length;
2616 }
2617 } else {
2618 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2619 total += block->used_length;
2620 }
2621 }
2622 return total;
2623}
2624
2625uint64_t ram_bytes_total(void)
2626{
2627 return ram_bytes_total_common(false);
2628}
2629
2630static void xbzrle_load_setup(void)
2631{
2632 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2633}
2634
2635static void xbzrle_load_cleanup(void)
2636{
2637 g_free(XBZRLE.decoded_buf);
2638 XBZRLE.decoded_buf = NULL;
2639}
2640
2641static void ram_state_cleanup(RAMState **rsp)
2642{
2643 if (*rsp) {
2644 migration_page_queue_free(*rsp);
2645 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2646 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2647 g_free(*rsp);
2648 *rsp = NULL;
2649 }
2650}
2651
2652static void xbzrle_cleanup(void)
2653{
2654 XBZRLE_cache_lock();
2655 if (XBZRLE.cache) {
2656 cache_fini(XBZRLE.cache);
2657 g_free(XBZRLE.encoded_buf);
2658 g_free(XBZRLE.current_buf);
2659 g_free(XBZRLE.zero_target_page);
2660 XBZRLE.cache = NULL;
2661 XBZRLE.encoded_buf = NULL;
2662 XBZRLE.current_buf = NULL;
2663 XBZRLE.zero_target_page = NULL;
2664 }
2665 XBZRLE_cache_unlock();
2666}
2667
2668static void ram_save_cleanup(void *opaque)
2669{
2670 RAMState **rsp = opaque;
2671 RAMBlock *block;
2672
2673
2674 if (!migrate_background_snapshot()) {
2675
2676
2677
2678 if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
2679
2680
2681
2682
2683
2684 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
2685 }
2686 }
2687
2688 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2689 g_free(block->clear_bmap);
2690 block->clear_bmap = NULL;
2691 g_free(block->bmap);
2692 block->bmap = NULL;
2693 }
2694
2695 xbzrle_cleanup();
2696 compress_threads_save_cleanup();
2697 ram_state_cleanup(rsp);
2698}
2699
2700static void ram_state_reset(RAMState *rs)
2701{
2702 rs->last_seen_block = NULL;
2703 rs->last_sent_block = NULL;
2704 rs->last_page = 0;
2705 rs->last_version = ram_list.version;
2706 rs->xbzrle_enabled = false;
2707 postcopy_preempt_reset(rs);
2708 rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
2709}
2710
2711#define MAX_WAIT 50
2712
2713
2714
2715void ram_postcopy_migrated_memory_release(MigrationState *ms)
2716{
2717 struct RAMBlock *block;
2718
2719 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2720 unsigned long *bitmap = block->bmap;
2721 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2722 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2723
2724 while (run_start < range) {
2725 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2726 ram_discard_range(block->idstr,
2727 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2728 ((ram_addr_t)(run_end - run_start))
2729 << TARGET_PAGE_BITS);
2730 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2731 }
2732 }
2733}
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743static void postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2744{
2745 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2746 unsigned long current;
2747 unsigned long *bitmap = block->bmap;
2748
2749 for (current = 0; current < end; ) {
2750 unsigned long one = find_next_bit(bitmap, end, current);
2751 unsigned long zero, discard_length;
2752
2753 if (one >= end) {
2754 break;
2755 }
2756
2757 zero = find_next_zero_bit(bitmap, end, one + 1);
2758
2759 if (zero >= end) {
2760 discard_length = end - one;
2761 } else {
2762 discard_length = zero - one;
2763 }
2764 postcopy_discard_send_range(ms, one, discard_length);
2765 current = one + discard_length;
2766 }
2767}
2768
2769static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block);
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782static void postcopy_each_ram_send_discard(MigrationState *ms)
2783{
2784 struct RAMBlock *block;
2785
2786 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2787 postcopy_discard_send_init(ms, block->idstr);
2788
2789
2790
2791
2792
2793
2794
2795 postcopy_chunk_hostpages_pass(ms, block);
2796
2797
2798
2799
2800
2801
2802 postcopy_send_discard_bm_ram(ms, block);
2803 postcopy_discard_send_finish(ms);
2804 }
2805}
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2821{
2822 RAMState *rs = ram_state;
2823 unsigned long *bitmap = block->bmap;
2824 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2825 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2826 unsigned long run_start;
2827
2828 if (block->page_size == TARGET_PAGE_SIZE) {
2829
2830 return;
2831 }
2832
2833
2834 run_start = find_next_bit(bitmap, pages, 0);
2835
2836 while (run_start < pages) {
2837
2838
2839
2840
2841
2842 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2843
2844 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2845
2846
2847
2848
2849
2850 }
2851
2852 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2853 unsigned long page;
2854 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2855 host_ratio);
2856 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2857
2858
2859 for (page = fixup_start_addr;
2860 page < fixup_start_addr + host_ratio; page++) {
2861
2862
2863
2864
2865 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2866 }
2867 }
2868
2869
2870 run_start = find_next_bit(bitmap, pages, run_start);
2871 }
2872}
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887void ram_postcopy_send_discard_bitmap(MigrationState *ms)
2888{
2889 RAMState *rs = ram_state;
2890
2891 RCU_READ_LOCK_GUARD();
2892
2893
2894 migration_bitmap_sync(rs);
2895
2896
2897 rs->last_seen_block = NULL;
2898 rs->last_sent_block = NULL;
2899 rs->last_page = 0;
2900
2901 postcopy_each_ram_send_discard(ms);
2902
2903 trace_ram_postcopy_send_discard_bitmap();
2904}
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2917{
2918 trace_ram_discard_range(rbname, start, length);
2919
2920 RCU_READ_LOCK_GUARD();
2921 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2922
2923 if (!rb) {
2924 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2925 return -1;
2926 }
2927
2928
2929
2930
2931
2932 if (rb->receivedmap) {
2933 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2934 length >> qemu_target_page_bits());
2935 }
2936
2937 return ram_block_discard_range(rb, start, length);
2938}
2939
2940
2941
2942
2943
2944static int xbzrle_init(void)
2945{
2946 Error *local_err = NULL;
2947
2948 if (!migrate_use_xbzrle()) {
2949 return 0;
2950 }
2951
2952 XBZRLE_cache_lock();
2953
2954 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2955 if (!XBZRLE.zero_target_page) {
2956 error_report("%s: Error allocating zero page", __func__);
2957 goto err_out;
2958 }
2959
2960 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2961 TARGET_PAGE_SIZE, &local_err);
2962 if (!XBZRLE.cache) {
2963 error_report_err(local_err);
2964 goto free_zero_page;
2965 }
2966
2967 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2968 if (!XBZRLE.encoded_buf) {
2969 error_report("%s: Error allocating encoded_buf", __func__);
2970 goto free_cache;
2971 }
2972
2973 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2974 if (!XBZRLE.current_buf) {
2975 error_report("%s: Error allocating current_buf", __func__);
2976 goto free_encoded_buf;
2977 }
2978
2979
2980 XBZRLE_cache_unlock();
2981 return 0;
2982
2983free_encoded_buf:
2984 g_free(XBZRLE.encoded_buf);
2985 XBZRLE.encoded_buf = NULL;
2986free_cache:
2987 cache_fini(XBZRLE.cache);
2988 XBZRLE.cache = NULL;
2989free_zero_page:
2990 g_free(XBZRLE.zero_target_page);
2991 XBZRLE.zero_target_page = NULL;
2992err_out:
2993 XBZRLE_cache_unlock();
2994 return -ENOMEM;
2995}
2996
2997static int ram_state_init(RAMState **rsp)
2998{
2999 *rsp = g_try_new0(RAMState, 1);
3000
3001 if (!*rsp) {
3002 error_report("%s: Init ramstate fail", __func__);
3003 return -1;
3004 }
3005
3006 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3007 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3008 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
3009
3010
3011
3012
3013
3014
3015 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
3016 ram_state_reset(*rsp);
3017
3018 return 0;
3019}
3020
3021static void ram_list_init_bitmaps(void)
3022{
3023 MigrationState *ms = migrate_get_current();
3024 RAMBlock *block;
3025 unsigned long pages;
3026 uint8_t shift;
3027
3028
3029 if (ram_bytes_total()) {
3030 shift = ms->clear_bitmap_shift;
3031 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
3032 error_report("clear_bitmap_shift (%u) too big, using "
3033 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
3034 shift = CLEAR_BITMAP_SHIFT_MAX;
3035 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
3036 error_report("clear_bitmap_shift (%u) too small, using "
3037 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
3038 shift = CLEAR_BITMAP_SHIFT_MIN;
3039 }
3040
3041 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3042 pages = block->max_length >> TARGET_PAGE_BITS;
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052 block->bmap = bitmap_new(pages);
3053 bitmap_set(block->bmap, 0, pages);
3054 block->clear_bmap_shift = shift;
3055 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
3056 }
3057 }
3058}
3059
3060static void migration_bitmap_clear_discarded_pages(RAMState *rs)
3061{
3062 unsigned long pages;
3063 RAMBlock *rb;
3064
3065 RCU_READ_LOCK_GUARD();
3066
3067 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3068 pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
3069 rs->migration_dirty_pages -= pages;
3070 }
3071}
3072
3073static void ram_init_bitmaps(RAMState *rs)
3074{
3075
3076 qemu_mutex_lock_iothread();
3077 qemu_mutex_lock_ramlist();
3078
3079 WITH_RCU_READ_LOCK_GUARD() {
3080 ram_list_init_bitmaps();
3081
3082 if (!migrate_background_snapshot()) {
3083 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
3084 migration_bitmap_sync_precopy(rs);
3085 }
3086 }
3087 qemu_mutex_unlock_ramlist();
3088 qemu_mutex_unlock_iothread();
3089
3090
3091
3092
3093
3094 migration_bitmap_clear_discarded_pages(rs);
3095}
3096
3097static int ram_init_all(RAMState **rsp)
3098{
3099 if (ram_state_init(rsp)) {
3100 return -1;
3101 }
3102
3103 if (xbzrle_init()) {
3104 ram_state_cleanup(rsp);
3105 return -1;
3106 }
3107
3108 ram_init_bitmaps(*rsp);
3109
3110 return 0;
3111}
3112
3113static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3114{
3115 RAMBlock *block;
3116 uint64_t pages = 0;
3117
3118
3119
3120
3121
3122
3123
3124 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3125 pages += bitmap_count_one(block->bmap,
3126 block->used_length >> TARGET_PAGE_BITS);
3127 }
3128
3129
3130 rs->migration_dirty_pages = pages;
3131
3132 ram_state_reset(rs);
3133
3134
3135 rs->f = out;
3136
3137 trace_ram_state_resume_prepare(pages);
3138}
3139
3140
3141
3142
3143
3144
3145
3146void qemu_guest_free_page_hint(void *addr, size_t len)
3147{
3148 RAMBlock *block;
3149 ram_addr_t offset;
3150 size_t used_len, start, npages;
3151 MigrationState *s = migrate_get_current();
3152
3153
3154 if (!migration_is_setup_or_active(s->state)) {
3155 return;
3156 }
3157
3158 for (; len > 0; len -= used_len, addr += used_len) {
3159 block = qemu_ram_block_from_host(addr, false, &offset);
3160 if (unlikely(!block || offset >= block->used_length)) {
3161
3162
3163
3164
3165
3166 error_report_once("%s unexpected error", __func__);
3167 return;
3168 }
3169
3170 if (len <= block->used_length - offset) {
3171 used_len = len;
3172 } else {
3173 used_len = block->used_length - offset;
3174 }
3175
3176 start = offset >> TARGET_PAGE_BITS;
3177 npages = used_len >> TARGET_PAGE_BITS;
3178
3179 qemu_mutex_lock(&ram_state->bitmap_mutex);
3180
3181
3182
3183
3184
3185
3186 migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
3187 ram_state->migration_dirty_pages -=
3188 bitmap_count_one_with_offset(block->bmap, start, npages);
3189 bitmap_clear(block->bmap, start, npages);
3190 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3191 }
3192}
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209static int ram_save_setup(QEMUFile *f, void *opaque)
3210{
3211 RAMState **rsp = opaque;
3212 RAMBlock *block;
3213 int ret;
3214
3215 if (compress_threads_save_setup()) {
3216 return -1;
3217 }
3218
3219
3220 if (!migration_in_colo_state()) {
3221 if (ram_init_all(rsp) != 0) {
3222 compress_threads_save_cleanup();
3223 return -1;
3224 }
3225 }
3226 (*rsp)->f = f;
3227
3228 WITH_RCU_READ_LOCK_GUARD() {
3229 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
3230
3231 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3232 qemu_put_byte(f, strlen(block->idstr));
3233 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3234 qemu_put_be64(f, block->used_length);
3235 if (migrate_postcopy_ram() && block->page_size !=
3236 qemu_host_page_size) {
3237 qemu_put_be64(f, block->page_size);
3238 }
3239 if (migrate_ignore_shared()) {
3240 qemu_put_be64(f, block->mr->addr);
3241 }
3242 }
3243 }
3244
3245 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3246 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3247
3248 ret = multifd_send_sync_main(f);
3249 if (ret < 0) {
3250 return ret;
3251 }
3252
3253 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3254 qemu_fflush(f);
3255
3256 return 0;
3257}
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267static int ram_save_iterate(QEMUFile *f, void *opaque)
3268{
3269 RAMState **temp = opaque;
3270 RAMState *rs = *temp;
3271 int ret = 0;
3272 int i;
3273 int64_t t0;
3274 int done = 0;
3275
3276 if (blk_mig_bulk_active()) {
3277
3278
3279
3280 goto out;
3281 }
3282
3283
3284
3285
3286
3287
3288
3289
3290 qemu_mutex_lock(&rs->bitmap_mutex);
3291 WITH_RCU_READ_LOCK_GUARD() {
3292 if (ram_list.version != rs->last_version) {
3293 ram_state_reset(rs);
3294 }
3295
3296
3297 smp_rmb();
3298
3299 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3300
3301 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3302 i = 0;
3303 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3304 postcopy_has_request(rs)) {
3305 int pages;
3306
3307 if (qemu_file_get_error(f)) {
3308 break;
3309 }
3310
3311 pages = ram_find_and_save_block(rs);
3312
3313 if (pages == 0) {
3314 done = 1;
3315 break;
3316 }
3317
3318 if (pages < 0) {
3319 qemu_file_set_error(f, pages);
3320 break;
3321 }
3322
3323 rs->target_page_count += pages;
3324
3325
3326
3327
3328
3329 if (migrate_postcopy_ram()) {
3330 flush_compressed_data(rs);
3331 }
3332
3333
3334
3335
3336
3337
3338
3339 if ((i & 63) == 0) {
3340 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3341 1000000;
3342 if (t1 > MAX_WAIT) {
3343 trace_ram_save_iterate_big_wait(t1, i);
3344 break;
3345 }
3346 }
3347 i++;
3348 }
3349 }
3350 qemu_mutex_unlock(&rs->bitmap_mutex);
3351
3352 postcopy_preempt_reset_channel(rs);
3353
3354
3355
3356
3357
3358 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3359
3360out:
3361 if (ret >= 0
3362 && migration_is_setup_or_active(migrate_get_current()->state)) {
3363 ret = multifd_send_sync_main(rs->f);
3364 if (ret < 0) {
3365 return ret;
3366 }
3367
3368 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3369 qemu_fflush(f);
3370 ram_transferred_add(8);
3371
3372 ret = qemu_file_get_error(f);
3373 }
3374 if (ret < 0) {
3375 return ret;
3376 }
3377
3378 return done;
3379}
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391static int ram_save_complete(QEMUFile *f, void *opaque)
3392{
3393 RAMState **temp = opaque;
3394 RAMState *rs = *temp;
3395 int ret = 0;
3396
3397 rs->last_stage = !migration_in_colo_state();
3398
3399 WITH_RCU_READ_LOCK_GUARD() {
3400 if (!migration_in_postcopy()) {
3401 migration_bitmap_sync_precopy(rs);
3402 }
3403
3404 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3405
3406
3407
3408
3409 while (true) {
3410 int pages;
3411
3412 pages = ram_find_and_save_block(rs);
3413
3414 if (pages == 0) {
3415 break;
3416 }
3417 if (pages < 0) {
3418 ret = pages;
3419 break;
3420 }
3421 }
3422
3423 flush_compressed_data(rs);
3424 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3425 }
3426
3427 if (ret < 0) {
3428 return ret;
3429 }
3430
3431 postcopy_preempt_reset_channel(rs);
3432
3433 ret = multifd_send_sync_main(rs->f);
3434 if (ret < 0) {
3435 return ret;
3436 }
3437
3438 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3439 qemu_fflush(f);
3440
3441 return 0;
3442}
3443
3444static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3445 uint64_t *res_precopy_only,
3446 uint64_t *res_compatible,
3447 uint64_t *res_postcopy_only)
3448{
3449 RAMState **temp = opaque;
3450 RAMState *rs = *temp;
3451 uint64_t remaining_size;
3452
3453 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3454
3455 if (!migration_in_postcopy() &&
3456 remaining_size < max_size) {
3457 qemu_mutex_lock_iothread();
3458 WITH_RCU_READ_LOCK_GUARD() {
3459 migration_bitmap_sync_precopy(rs);
3460 }
3461 qemu_mutex_unlock_iothread();
3462 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3463 }
3464
3465 if (migrate_postcopy_ram()) {
3466
3467 *res_compatible += remaining_size;
3468 } else {
3469 *res_precopy_only += remaining_size;
3470 }
3471}
3472
3473static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3474{
3475 unsigned int xh_len;
3476 int xh_flags;
3477 uint8_t *loaded_data;
3478
3479
3480 xh_flags = qemu_get_byte(f);
3481 xh_len = qemu_get_be16(f);
3482
3483 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3484 error_report("Failed to load XBZRLE page - wrong compression!");
3485 return -1;
3486 }
3487
3488 if (xh_len > TARGET_PAGE_SIZE) {
3489 error_report("Failed to load XBZRLE page - len overflow!");
3490 return -1;
3491 }
3492 loaded_data = XBZRLE.decoded_buf;
3493
3494
3495 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3496
3497
3498 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3499 TARGET_PAGE_SIZE) == -1) {
3500 error_report("Failed to load XBZRLE page - decode error!");
3501 return -1;
3502 }
3503
3504 return 0;
3505}
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
3520 QEMUFile *f, int flags,
3521 int channel)
3522{
3523 RAMBlock *block = mis->last_recv_block[channel];
3524 char id[256];
3525 uint8_t len;
3526
3527 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3528 if (!block) {
3529 error_report("Ack, bad migration stream!");
3530 return NULL;
3531 }
3532 return block;
3533 }
3534
3535 len = qemu_get_byte(f);
3536 qemu_get_buffer(f, (uint8_t *)id, len);
3537 id[len] = 0;
3538
3539 block = qemu_ram_block_by_name(id);
3540 if (!block) {
3541 error_report("Can't find block %s", id);
3542 return NULL;
3543 }
3544
3545 if (ramblock_is_ignored(block)) {
3546 error_report("block %s should not be migrated !", id);
3547 return NULL;
3548 }
3549
3550 mis->last_recv_block[channel] = block;
3551
3552 return block;
3553}
3554
3555static inline void *host_from_ram_block_offset(RAMBlock *block,
3556 ram_addr_t offset)
3557{
3558 if (!offset_in_ramblock(block, offset)) {
3559 return NULL;
3560 }
3561
3562 return block->host + offset;
3563}
3564
3565static void *host_page_from_ram_block_offset(RAMBlock *block,
3566 ram_addr_t offset)
3567{
3568
3569 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3570 block->page_size);
3571}
3572
3573static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3574 ram_addr_t offset)
3575{
3576 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3577}
3578
3579static inline void *colo_cache_from_block_offset(RAMBlock *block,
3580 ram_addr_t offset, bool record_bitmap)
3581{
3582 if (!offset_in_ramblock(block, offset)) {
3583 return NULL;
3584 }
3585 if (!block->colo_cache) {
3586 error_report("%s: colo_cache is NULL in block :%s",
3587 __func__, block->idstr);
3588 return NULL;
3589 }
3590
3591
3592
3593
3594
3595
3596 if (record_bitmap &&
3597 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3598 ram_state->migration_dirty_pages++;
3599 }
3600 return block->colo_cache + offset;
3601}
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3614{
3615 if (ch != 0 || !buffer_is_zero(host, size)) {
3616 memset(host, ch, size);
3617 }
3618}
3619
3620
3621static int
3622qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3623 const uint8_t *source, size_t source_len)
3624{
3625 int err;
3626
3627 err = inflateReset(stream);
3628 if (err != Z_OK) {
3629 return -1;
3630 }
3631
3632 stream->avail_in = source_len;
3633 stream->next_in = (uint8_t *)source;
3634 stream->avail_out = dest_len;
3635 stream->next_out = dest;
3636
3637 err = inflate(stream, Z_NO_FLUSH);
3638 if (err != Z_STREAM_END) {
3639 return -1;
3640 }
3641
3642 return stream->total_out;
3643}
3644
3645static void *do_data_decompress(void *opaque)
3646{
3647 DecompressParam *param = opaque;
3648 unsigned long pagesize;
3649 uint8_t *des;
3650 int len, ret;
3651
3652 qemu_mutex_lock(¶m->mutex);
3653 while (!param->quit) {
3654 if (param->des) {
3655 des = param->des;
3656 len = param->len;
3657 param->des = 0;
3658 qemu_mutex_unlock(¶m->mutex);
3659
3660 pagesize = TARGET_PAGE_SIZE;
3661
3662 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3663 param->compbuf, len);
3664 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3665 error_report("decompress data failed");
3666 qemu_file_set_error(decomp_file, ret);
3667 }
3668
3669 qemu_mutex_lock(&decomp_done_lock);
3670 param->done = true;
3671 qemu_cond_signal(&decomp_done_cond);
3672 qemu_mutex_unlock(&decomp_done_lock);
3673
3674 qemu_mutex_lock(¶m->mutex);
3675 } else {
3676 qemu_cond_wait(¶m->cond, ¶m->mutex);
3677 }
3678 }
3679 qemu_mutex_unlock(¶m->mutex);
3680
3681 return NULL;
3682}
3683
3684static int wait_for_decompress_done(void)
3685{
3686 int idx, thread_count;
3687
3688 if (!migrate_use_compression()) {
3689 return 0;
3690 }
3691
3692 thread_count = migrate_decompress_threads();
3693 qemu_mutex_lock(&decomp_done_lock);
3694 for (idx = 0; idx < thread_count; idx++) {
3695 while (!decomp_param[idx].done) {
3696 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3697 }
3698 }
3699 qemu_mutex_unlock(&decomp_done_lock);
3700 return qemu_file_get_error(decomp_file);
3701}
3702
3703static void compress_threads_load_cleanup(void)
3704{
3705 int i, thread_count;
3706
3707 if (!migrate_use_compression()) {
3708 return;
3709 }
3710 thread_count = migrate_decompress_threads();
3711 for (i = 0; i < thread_count; i++) {
3712
3713
3714
3715
3716 if (!decomp_param[i].compbuf) {
3717 break;
3718 }
3719
3720 qemu_mutex_lock(&decomp_param[i].mutex);
3721 decomp_param[i].quit = true;
3722 qemu_cond_signal(&decomp_param[i].cond);
3723 qemu_mutex_unlock(&decomp_param[i].mutex);
3724 }
3725 for (i = 0; i < thread_count; i++) {
3726 if (!decomp_param[i].compbuf) {
3727 break;
3728 }
3729
3730 qemu_thread_join(decompress_threads + i);
3731 qemu_mutex_destroy(&decomp_param[i].mutex);
3732 qemu_cond_destroy(&decomp_param[i].cond);
3733 inflateEnd(&decomp_param[i].stream);
3734 g_free(decomp_param[i].compbuf);
3735 decomp_param[i].compbuf = NULL;
3736 }
3737 g_free(decompress_threads);
3738 g_free(decomp_param);
3739 decompress_threads = NULL;
3740 decomp_param = NULL;
3741 decomp_file = NULL;
3742}
3743
3744static int compress_threads_load_setup(QEMUFile *f)
3745{
3746 int i, thread_count;
3747
3748 if (!migrate_use_compression()) {
3749 return 0;
3750 }
3751
3752 thread_count = migrate_decompress_threads();
3753 decompress_threads = g_new0(QemuThread, thread_count);
3754 decomp_param = g_new0(DecompressParam, thread_count);
3755 qemu_mutex_init(&decomp_done_lock);
3756 qemu_cond_init(&decomp_done_cond);
3757 decomp_file = f;
3758 for (i = 0; i < thread_count; i++) {
3759 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3760 goto exit;
3761 }
3762
3763 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3764 qemu_mutex_init(&decomp_param[i].mutex);
3765 qemu_cond_init(&decomp_param[i].cond);
3766 decomp_param[i].done = true;
3767 decomp_param[i].quit = false;
3768 qemu_thread_create(decompress_threads + i, "decompress",
3769 do_data_decompress, decomp_param + i,
3770 QEMU_THREAD_JOINABLE);
3771 }
3772 return 0;
3773exit:
3774 compress_threads_load_cleanup();
3775 return -1;
3776}
3777
3778static void decompress_data_with_multi_threads(QEMUFile *f,
3779 void *host, int len)
3780{
3781 int idx, thread_count;
3782
3783 thread_count = migrate_decompress_threads();
3784 QEMU_LOCK_GUARD(&decomp_done_lock);
3785 while (true) {
3786 for (idx = 0; idx < thread_count; idx++) {
3787 if (decomp_param[idx].done) {
3788 decomp_param[idx].done = false;
3789 qemu_mutex_lock(&decomp_param[idx].mutex);
3790 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3791 decomp_param[idx].des = host;
3792 decomp_param[idx].len = len;
3793 qemu_cond_signal(&decomp_param[idx].cond);
3794 qemu_mutex_unlock(&decomp_param[idx].mutex);
3795 break;
3796 }
3797 }
3798 if (idx < thread_count) {
3799 break;
3800 } else {
3801 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3802 }
3803 }
3804}
3805
3806static void colo_init_ram_state(void)
3807{
3808 ram_state_init(&ram_state);
3809}
3810
3811
3812
3813
3814
3815
3816int colo_init_ram_cache(void)
3817{
3818 RAMBlock *block;
3819
3820 WITH_RCU_READ_LOCK_GUARD() {
3821 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3822 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3823 NULL, false, false);
3824 if (!block->colo_cache) {
3825 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3826 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3827 block->used_length);
3828 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3829 if (block->colo_cache) {
3830 qemu_anon_ram_free(block->colo_cache, block->used_length);
3831 block->colo_cache = NULL;
3832 }
3833 }
3834 return -errno;
3835 }
3836 if (!machine_dump_guest_core(current_machine)) {
3837 qemu_madvise(block->colo_cache, block->used_length,
3838 QEMU_MADV_DONTDUMP);
3839 }
3840 }
3841 }
3842
3843
3844
3845
3846
3847
3848 if (ram_bytes_total()) {
3849 RAMBlock *block;
3850
3851 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3852 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3853 block->bmap = bitmap_new(pages);
3854 }
3855 }
3856
3857 colo_init_ram_state();
3858 return 0;
3859}
3860
3861
3862void colo_incoming_start_dirty_log(void)
3863{
3864 RAMBlock *block = NULL;
3865
3866 qemu_mutex_lock_iothread();
3867 qemu_mutex_lock_ramlist();
3868
3869 memory_global_dirty_log_sync();
3870 WITH_RCU_READ_LOCK_GUARD() {
3871 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3872 ramblock_sync_dirty_bitmap(ram_state, block);
3873
3874 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3875 }
3876 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
3877 }
3878 ram_state->migration_dirty_pages = 0;
3879 qemu_mutex_unlock_ramlist();
3880 qemu_mutex_unlock_iothread();
3881}
3882
3883
3884void colo_release_ram_cache(void)
3885{
3886 RAMBlock *block;
3887
3888 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
3889 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3890 g_free(block->bmap);
3891 block->bmap = NULL;
3892 }
3893
3894 WITH_RCU_READ_LOCK_GUARD() {
3895 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3896 if (block->colo_cache) {
3897 qemu_anon_ram_free(block->colo_cache, block->used_length);
3898 block->colo_cache = NULL;
3899 }
3900 }
3901 }
3902 ram_state_cleanup(&ram_state);
3903}
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913static int ram_load_setup(QEMUFile *f, void *opaque)
3914{
3915 if (compress_threads_load_setup(f)) {
3916 return -1;
3917 }
3918
3919 xbzrle_load_setup();
3920 ramblock_recv_map_init();
3921
3922 return 0;
3923}
3924
3925static int ram_load_cleanup(void *opaque)
3926{
3927 RAMBlock *rb;
3928
3929 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3930 qemu_ram_block_writeback(rb);
3931 }
3932
3933 xbzrle_load_cleanup();
3934 compress_threads_load_cleanup();
3935
3936 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3937 g_free(rb->receivedmap);
3938 rb->receivedmap = NULL;
3939 }
3940
3941 return 0;
3942}
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3956{
3957 return postcopy_ram_incoming_init(mis);
3958}
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971int ram_load_postcopy(QEMUFile *f, int channel)
3972{
3973 int flags = 0, ret = 0;
3974 bool place_needed = false;
3975 bool matches_target_page_size = false;
3976 MigrationIncomingState *mis = migration_incoming_get_current();
3977 PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[channel];
3978
3979 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3980 ram_addr_t addr;
3981 void *page_buffer = NULL;
3982 void *place_source = NULL;
3983 RAMBlock *block = NULL;
3984 uint8_t ch;
3985 int len;
3986
3987 addr = qemu_get_be64(f);
3988
3989
3990
3991
3992
3993 ret = qemu_file_get_error(f);
3994 if (ret) {
3995 break;
3996 }
3997
3998 flags = addr & ~TARGET_PAGE_MASK;
3999 addr &= TARGET_PAGE_MASK;
4000
4001 trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);
4002 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4003 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
4004 block = ram_block_from_stream(mis, f, flags, channel);
4005 if (!block) {
4006 ret = -EINVAL;
4007 break;
4008 }
4009
4010
4011
4012
4013
4014
4015
4016 if (!block->host || addr >= block->postcopy_length) {
4017 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4018 ret = -EINVAL;
4019 break;
4020 }
4021 tmp_page->target_pages++;
4022 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033 page_buffer = tmp_page->tmp_huge_page +
4034 host_page_offset_from_ram_block_offset(block, addr);
4035
4036 if (tmp_page->target_pages == 1) {
4037 tmp_page->host_addr =
4038 host_page_from_ram_block_offset(block, addr);
4039 } else if (tmp_page->host_addr !=
4040 host_page_from_ram_block_offset(block, addr)) {
4041
4042 error_report("Non-same host page detected on channel %d: "
4043 "Target host page %p, received host page %p "
4044 "(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",
4045 channel, tmp_page->host_addr,
4046 host_page_from_ram_block_offset(block, addr),
4047 block->idstr, addr, tmp_page->target_pages);
4048 ret = -EINVAL;
4049 break;
4050 }
4051
4052
4053
4054
4055
4056 if (tmp_page->target_pages ==
4057 (block->page_size / TARGET_PAGE_SIZE)) {
4058 place_needed = true;
4059 }
4060 place_source = tmp_page->tmp_huge_page;
4061 }
4062
4063 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4064 case RAM_SAVE_FLAG_ZERO:
4065 ch = qemu_get_byte(f);
4066
4067
4068
4069
4070 if (ch || !matches_target_page_size) {
4071 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4072 }
4073 if (ch) {
4074 tmp_page->all_zero = false;
4075 }
4076 break;
4077
4078 case RAM_SAVE_FLAG_PAGE:
4079 tmp_page->all_zero = false;
4080 if (!matches_target_page_size) {
4081
4082 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4083 } else {
4084
4085
4086
4087
4088
4089
4090
4091
4092 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4093 TARGET_PAGE_SIZE);
4094 }
4095 break;
4096 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4097 tmp_page->all_zero = false;
4098 len = qemu_get_be32(f);
4099 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4100 error_report("Invalid compressed data length: %d", len);
4101 ret = -EINVAL;
4102 break;
4103 }
4104 decompress_data_with_multi_threads(f, page_buffer, len);
4105 break;
4106
4107 case RAM_SAVE_FLAG_EOS:
4108
4109 multifd_recv_sync_main();
4110 break;
4111 default:
4112 error_report("Unknown combination of migration flags: 0x%x"
4113 " (postcopy mode)", flags);
4114 ret = -EINVAL;
4115 break;
4116 }
4117
4118
4119 if (place_needed) {
4120 ret |= wait_for_decompress_done();
4121 }
4122
4123
4124 if (!ret && qemu_file_get_error(f)) {
4125 ret = qemu_file_get_error(f);
4126 }
4127
4128 if (!ret && place_needed) {
4129 if (tmp_page->all_zero) {
4130 ret = postcopy_place_page_zero(mis, tmp_page->host_addr, block);
4131 } else {
4132 ret = postcopy_place_page(mis, tmp_page->host_addr,
4133 place_source, block);
4134 }
4135 place_needed = false;
4136 postcopy_temp_page_reset(tmp_page);
4137 }
4138 }
4139
4140 return ret;
4141}
4142
4143static bool postcopy_is_advised(void)
4144{
4145 PostcopyState ps = postcopy_state_get();
4146 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4147}
4148
4149static bool postcopy_is_running(void)
4150{
4151 PostcopyState ps = postcopy_state_get();
4152 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4153}
4154
4155
4156
4157
4158
4159void colo_flush_ram_cache(void)
4160{
4161 RAMBlock *block = NULL;
4162 void *dst_host;
4163 void *src_host;
4164 unsigned long offset = 0;
4165
4166 memory_global_dirty_log_sync();
4167 WITH_RCU_READ_LOCK_GUARD() {
4168 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4169 ramblock_sync_dirty_bitmap(ram_state, block);
4170 }
4171 }
4172
4173 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4174 WITH_RCU_READ_LOCK_GUARD() {
4175 block = QLIST_FIRST_RCU(&ram_list.blocks);
4176
4177 while (block) {
4178 unsigned long num = 0;
4179
4180 offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
4181 if (!offset_in_ramblock(block,
4182 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
4183 offset = 0;
4184 num = 0;
4185 block = QLIST_NEXT_RCU(block, next);
4186 } else {
4187 unsigned long i = 0;
4188
4189 for (i = 0; i < num; i++) {
4190 migration_bitmap_clear_dirty(ram_state, block, offset + i);
4191 }
4192 dst_host = block->host
4193 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
4194 src_host = block->colo_cache
4195 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
4196 memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
4197 offset += num;
4198 }
4199 }
4200 }
4201 trace_colo_flush_ram_cache_end();
4202}
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214static int ram_load_precopy(QEMUFile *f)
4215{
4216 MigrationIncomingState *mis = migration_incoming_get_current();
4217 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
4218
4219 bool postcopy_advised = postcopy_is_advised();
4220 if (!migrate_use_compression()) {
4221 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4222 }
4223
4224 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4225 ram_addr_t addr, total_ram_bytes;
4226 void *host = NULL, *host_bak = NULL;
4227 uint8_t ch;
4228
4229
4230
4231
4232
4233 if ((i & 32767) == 0 && qemu_in_coroutine()) {
4234 aio_co_schedule(qemu_get_current_aio_context(),
4235 qemu_coroutine_self());
4236 qemu_coroutine_yield();
4237 }
4238 i++;
4239
4240 addr = qemu_get_be64(f);
4241 flags = addr & ~TARGET_PAGE_MASK;
4242 addr &= TARGET_PAGE_MASK;
4243
4244 if (flags & invalid_flags) {
4245 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4246 error_report("Received an unexpected compressed page");
4247 }
4248
4249 ret = -EINVAL;
4250 break;
4251 }
4252
4253 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4254 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4255 RAMBlock *block = ram_block_from_stream(mis, f, flags,
4256 RAM_CHANNEL_PRECOPY);
4257
4258 host = host_from_ram_block_offset(block, addr);
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270 if (migration_incoming_colo_enabled()) {
4271 if (migration_incoming_in_colo_state()) {
4272
4273 host = colo_cache_from_block_offset(block, addr, true);
4274 } else {
4275
4276
4277
4278
4279 host_bak = colo_cache_from_block_offset(block, addr, false);
4280 }
4281 }
4282 if (!host) {
4283 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4284 ret = -EINVAL;
4285 break;
4286 }
4287 if (!migration_incoming_in_colo_state()) {
4288 ramblock_recv_bitmap_set(block, host);
4289 }
4290
4291 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
4292 }
4293
4294 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4295 case RAM_SAVE_FLAG_MEM_SIZE:
4296
4297 total_ram_bytes = addr;
4298 while (!ret && total_ram_bytes) {
4299 RAMBlock *block;
4300 char id[256];
4301 ram_addr_t length;
4302
4303 len = qemu_get_byte(f);
4304 qemu_get_buffer(f, (uint8_t *)id, len);
4305 id[len] = 0;
4306 length = qemu_get_be64(f);
4307
4308 block = qemu_ram_block_by_name(id);
4309 if (block && !qemu_ram_is_migratable(block)) {
4310 error_report("block %s should not be migrated !", id);
4311 ret = -EINVAL;
4312 } else if (block) {
4313 if (length != block->used_length) {
4314 Error *local_err = NULL;
4315
4316 ret = qemu_ram_resize(block, length,
4317 &local_err);
4318 if (local_err) {
4319 error_report_err(local_err);
4320 }
4321 }
4322
4323 if (postcopy_advised && migrate_postcopy_ram() &&
4324 block->page_size != qemu_host_page_size) {
4325 uint64_t remote_page_size = qemu_get_be64(f);
4326 if (remote_page_size != block->page_size) {
4327 error_report("Mismatched RAM page size %s "
4328 "(local) %zd != %" PRId64,
4329 id, block->page_size,
4330 remote_page_size);
4331 ret = -EINVAL;
4332 }
4333 }
4334 if (migrate_ignore_shared()) {
4335 hwaddr addr = qemu_get_be64(f);
4336 if (ramblock_is_ignored(block) &&
4337 block->mr->addr != addr) {
4338 error_report("Mismatched GPAs for block %s "
4339 "%" PRId64 "!= %" PRId64,
4340 id, (uint64_t)addr,
4341 (uint64_t)block->mr->addr);
4342 ret = -EINVAL;
4343 }
4344 }
4345 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4346 block->idstr);
4347 } else {
4348 error_report("Unknown ramblock \"%s\", cannot "
4349 "accept migration", id);
4350 ret = -EINVAL;
4351 }
4352
4353 total_ram_bytes -= length;
4354 }
4355 break;
4356
4357 case RAM_SAVE_FLAG_ZERO:
4358 ch = qemu_get_byte(f);
4359 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4360 break;
4361
4362 case RAM_SAVE_FLAG_PAGE:
4363 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4364 break;
4365
4366 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4367 len = qemu_get_be32(f);
4368 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4369 error_report("Invalid compressed data length: %d", len);
4370 ret = -EINVAL;
4371 break;
4372 }
4373 decompress_data_with_multi_threads(f, host, len);
4374 break;
4375
4376 case RAM_SAVE_FLAG_XBZRLE:
4377 if (load_xbzrle(f, addr, host) < 0) {
4378 error_report("Failed to decompress XBZRLE page at "
4379 RAM_ADDR_FMT, addr);
4380 ret = -EINVAL;
4381 break;
4382 }
4383 break;
4384 case RAM_SAVE_FLAG_EOS:
4385
4386 multifd_recv_sync_main();
4387 break;
4388 default:
4389 if (flags & RAM_SAVE_FLAG_HOOK) {
4390 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
4391 } else {
4392 error_report("Unknown combination of migration flags: 0x%x",
4393 flags);
4394 ret = -EINVAL;
4395 }
4396 }
4397 if (!ret) {
4398 ret = qemu_file_get_error(f);
4399 }
4400 if (!ret && host_bak) {
4401 memcpy(host_bak, host, TARGET_PAGE_SIZE);
4402 }
4403 }
4404
4405 ret |= wait_for_decompress_done();
4406 return ret;
4407}
4408
4409static int ram_load(QEMUFile *f, void *opaque, int version_id)
4410{
4411 int ret = 0;
4412 static uint64_t seq_iter;
4413
4414
4415
4416
4417 bool postcopy_running = postcopy_is_running();
4418
4419 seq_iter++;
4420
4421 if (version_id != 4) {
4422 return -EINVAL;
4423 }
4424
4425
4426
4427
4428
4429
4430
4431 WITH_RCU_READ_LOCK_GUARD() {
4432 if (postcopy_running) {
4433
4434
4435
4436
4437
4438 ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY);
4439 } else {
4440 ret = ram_load_precopy(f);
4441 }
4442 }
4443 trace_ram_load_complete(ret, seq_iter);
4444
4445 return ret;
4446}
4447
4448static bool ram_has_postcopy(void *opaque)
4449{
4450 RAMBlock *rb;
4451 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4452 if (ramblock_is_pmem(rb)) {
4453 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4454 "is not supported now!", rb->idstr, rb->host);
4455 return false;
4456 }
4457 }
4458
4459 return migrate_postcopy_ram();
4460}
4461
4462
4463static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4464{
4465 RAMBlock *block;
4466 QEMUFile *file = s->to_dst_file;
4467 int ramblock_count = 0;
4468
4469 trace_ram_dirty_bitmap_sync_start();
4470
4471 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4472 qemu_savevm_send_recv_bitmap(file, block->idstr);
4473 trace_ram_dirty_bitmap_request(block->idstr);
4474 ramblock_count++;
4475 }
4476
4477 trace_ram_dirty_bitmap_sync_wait();
4478
4479
4480 while (ramblock_count--) {
4481 qemu_sem_wait(&s->rp_state.rp_sem);
4482 }
4483
4484 trace_ram_dirty_bitmap_sync_complete();
4485
4486 return 0;
4487}
4488
4489static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4490{
4491 qemu_sem_post(&s->rp_state.rp_sem);
4492}
4493
4494
4495
4496
4497
4498
4499int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4500{
4501 int ret = -EINVAL;
4502
4503 QEMUFile *file = s->rp_state.from_dst_file;
4504 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
4505 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4506 uint64_t size, end_mark;
4507
4508 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4509
4510 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4511 error_report("%s: incorrect state %s", __func__,
4512 MigrationStatus_str(s->state));
4513 return -EINVAL;
4514 }
4515
4516
4517
4518
4519
4520 local_size = ROUND_UP(local_size, 8);
4521
4522
4523 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4524
4525 size = qemu_get_be64(file);
4526
4527
4528 if (size != local_size) {
4529 error_report("%s: ramblock '%s' bitmap size mismatch "
4530 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4531 block->idstr, size, local_size);
4532 ret = -EINVAL;
4533 goto out;
4534 }
4535
4536 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4537 end_mark = qemu_get_be64(file);
4538
4539 ret = qemu_file_get_error(file);
4540 if (ret || size != local_size) {
4541 error_report("%s: read bitmap failed for ramblock '%s': %d"
4542 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4543 __func__, block->idstr, ret, local_size, size);
4544 ret = -EIO;
4545 goto out;
4546 }
4547
4548 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4549 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
4550 __func__, block->idstr, end_mark);
4551 ret = -EINVAL;
4552 goto out;
4553 }
4554
4555
4556
4557
4558
4559 bitmap_from_le(block->bmap, le_bitmap, nbits);
4560
4561
4562
4563
4564
4565 bitmap_complement(block->bmap, block->bmap, nbits);
4566
4567
4568 ramblock_dirty_bitmap_clear_discarded_pages(block);
4569
4570
4571 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4572
4573
4574
4575
4576
4577 ram_dirty_bitmap_reload_notify(s);
4578
4579 ret = 0;
4580out:
4581 g_free(le_bitmap);
4582 return ret;
4583}
4584
4585static int ram_resume_prepare(MigrationState *s, void *opaque)
4586{
4587 RAMState *rs = *(RAMState **)opaque;
4588 int ret;
4589
4590 ret = ram_dirty_bitmap_sync_all(s, rs);
4591 if (ret) {
4592 return ret;
4593 }
4594
4595 ram_state_resume_prepare(rs, s->to_dst_file);
4596
4597 return 0;
4598}
4599
4600void postcopy_preempt_shutdown_file(MigrationState *s)
4601{
4602 qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
4603 qemu_fflush(s->postcopy_qemufile_src);
4604}
4605
4606static SaveVMHandlers savevm_ram_handlers = {
4607 .save_setup = ram_save_setup,
4608 .save_live_iterate = ram_save_iterate,
4609 .save_live_complete_postcopy = ram_save_complete,
4610 .save_live_complete_precopy = ram_save_complete,
4611 .has_postcopy = ram_has_postcopy,
4612 .save_live_pending = ram_save_pending,
4613 .load_state = ram_load,
4614 .save_cleanup = ram_save_cleanup,
4615 .load_setup = ram_load_setup,
4616 .load_cleanup = ram_load_cleanup,
4617 .resume_prepare = ram_resume_prepare,
4618};
4619
4620static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4621 size_t old_size, size_t new_size)
4622{
4623 PostcopyState ps = postcopy_state_get();
4624 ram_addr_t offset;
4625 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4626 Error *err = NULL;
4627
4628 if (ramblock_is_ignored(rb)) {
4629 return;
4630 }
4631
4632 if (!migration_is_idle()) {
4633
4634
4635
4636
4637
4638
4639 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4640 migration_cancel(err);
4641 error_free(err);
4642 }
4643
4644 switch (ps) {
4645 case POSTCOPY_INCOMING_ADVISE:
4646
4647
4648
4649
4650
4651 if (old_size < new_size) {
4652 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4653 error_report("RAM block '%s' discard of resized RAM failed",
4654 rb->idstr);
4655 }
4656 }
4657 rb->postcopy_length = new_size;
4658 break;
4659 case POSTCOPY_INCOMING_NONE:
4660 case POSTCOPY_INCOMING_RUNNING:
4661 case POSTCOPY_INCOMING_END:
4662
4663
4664
4665
4666
4667 break;
4668 default:
4669 error_report("RAM block '%s' resized during postcopy state: %d",
4670 rb->idstr, ps);
4671 exit(-1);
4672 }
4673}
4674
4675static RAMBlockNotifier ram_mig_ram_notifier = {
4676 .ram_block_resized = ram_mig_ram_block_resized,
4677};
4678
4679void ram_mig_init(void)
4680{
4681 qemu_mutex_init(&XBZRLE.lock);
4682 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4683 ram_block_notifier_add(&ram_mig_ram_notifier);
4684}
4685