1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include "qemu/osdep.h"
29#include "qemu-common.h"
30#include "cpu.h"
31#include <zlib.h>
32#include "qapi-event.h"
33#include "qemu/cutils.h"
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
38#include "migration/migration.h"
39#include "migration/postcopy-ram.h"
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
42#include "qemu/error-report.h"
43#include "trace.h"
44#include "exec/ram_addr.h"
45#include "qemu/rcu_queue.h"
46
47#ifdef DEBUG_MIGRATION_RAM
48#define DPRINTF(fmt, ...) \
49 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
50#else
51#define DPRINTF(fmt, ...) \
52 do { } while (0)
53#endif
54
55static int dirty_rate_high_cnt;
56
57static uint64_t bitmap_sync_count;
58
59
60
61
62#define RAM_SAVE_FLAG_FULL 0x01
63#define RAM_SAVE_FLAG_COMPRESS 0x02
64#define RAM_SAVE_FLAG_MEM_SIZE 0x04
65#define RAM_SAVE_FLAG_PAGE 0x08
66#define RAM_SAVE_FLAG_EOS 0x10
67#define RAM_SAVE_FLAG_CONTINUE 0x20
68#define RAM_SAVE_FLAG_XBZRLE 0x40
69
70#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
71
72static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
73
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
76 return buffer_find_nonzero_offset(p, size) == size;
77}
78
79
80
81static struct {
82
83 uint8_t *encoded_buf;
84
85 uint8_t *current_buf;
86
87 PageCache *cache;
88 QemuMutex lock;
89} XBZRLE;
90
91
92static uint8_t *xbzrle_decoded_buf;
93
94static void XBZRLE_cache_lock(void)
95{
96 if (migrate_use_xbzrle())
97 qemu_mutex_lock(&XBZRLE.lock);
98}
99
100static void XBZRLE_cache_unlock(void)
101{
102 if (migrate_use_xbzrle())
103 qemu_mutex_unlock(&XBZRLE.lock);
104}
105
106
107
108
109
110
111
112int64_t xbzrle_cache_resize(int64_t new_size)
113{
114 PageCache *new_cache;
115 int64_t ret;
116
117 if (new_size < TARGET_PAGE_SIZE) {
118 return -1;
119 }
120
121 XBZRLE_cache_lock();
122
123 if (XBZRLE.cache != NULL) {
124 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
125 goto out_new_size;
126 }
127 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
128 TARGET_PAGE_SIZE);
129 if (!new_cache) {
130 error_report("Error creating cache");
131 ret = -1;
132 goto out;
133 }
134
135 cache_fini(XBZRLE.cache);
136 XBZRLE.cache = new_cache;
137 }
138
139out_new_size:
140 ret = pow2floor(new_size);
141out:
142 XBZRLE_cache_unlock();
143 return ret;
144}
145
146
147typedef struct AccountingInfo {
148 uint64_t dup_pages;
149 uint64_t skipped_pages;
150 uint64_t norm_pages;
151 uint64_t iterations;
152 uint64_t xbzrle_bytes;
153 uint64_t xbzrle_pages;
154 uint64_t xbzrle_cache_miss;
155 double xbzrle_cache_miss_rate;
156 uint64_t xbzrle_overflows;
157} AccountingInfo;
158
159static AccountingInfo acct_info;
160
161static void acct_clear(void)
162{
163 memset(&acct_info, 0, sizeof(acct_info));
164}
165
166uint64_t dup_mig_bytes_transferred(void)
167{
168 return acct_info.dup_pages * TARGET_PAGE_SIZE;
169}
170
171uint64_t dup_mig_pages_transferred(void)
172{
173 return acct_info.dup_pages;
174}
175
176uint64_t skipped_mig_bytes_transferred(void)
177{
178 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
179}
180
181uint64_t skipped_mig_pages_transferred(void)
182{
183 return acct_info.skipped_pages;
184}
185
186uint64_t norm_mig_bytes_transferred(void)
187{
188 return acct_info.norm_pages * TARGET_PAGE_SIZE;
189}
190
191uint64_t norm_mig_pages_transferred(void)
192{
193 return acct_info.norm_pages;
194}
195
196uint64_t xbzrle_mig_bytes_transferred(void)
197{
198 return acct_info.xbzrle_bytes;
199}
200
201uint64_t xbzrle_mig_pages_transferred(void)
202{
203 return acct_info.xbzrle_pages;
204}
205
206uint64_t xbzrle_mig_pages_cache_miss(void)
207{
208 return acct_info.xbzrle_cache_miss;
209}
210
211double xbzrle_mig_cache_miss_rate(void)
212{
213 return acct_info.xbzrle_cache_miss_rate;
214}
215
216uint64_t xbzrle_mig_pages_overflow(void)
217{
218 return acct_info.xbzrle_overflows;
219}
220
221
222
223static RAMBlock *last_seen_block;
224
225static RAMBlock *last_sent_block;
226static ram_addr_t last_offset;
227static QemuMutex migration_bitmap_mutex;
228static uint64_t migration_dirty_pages;
229static uint32_t last_version;
230static bool ram_bulk_stage;
231
232
233struct PageSearchStatus {
234
235 RAMBlock *block;
236
237 ram_addr_t offset;
238
239 bool complete_round;
240};
241typedef struct PageSearchStatus PageSearchStatus;
242
243static struct BitmapRcu {
244 struct rcu_head rcu;
245
246 unsigned long *bmap;
247
248
249
250
251
252 unsigned long *unsentmap;
253} *migration_bitmap_rcu;
254
255struct CompressParam {
256 bool done;
257 bool quit;
258 QEMUFile *file;
259 QemuMutex mutex;
260 QemuCond cond;
261 RAMBlock *block;
262 ram_addr_t offset;
263};
264typedef struct CompressParam CompressParam;
265
266struct DecompressParam {
267 bool done;
268 bool quit;
269 QemuMutex mutex;
270 QemuCond cond;
271 void *des;
272 uint8_t *compbuf;
273 int len;
274};
275typedef struct DecompressParam DecompressParam;
276
277static CompressParam *comp_param;
278static QemuThread *compress_threads;
279
280
281
282
283static QemuMutex comp_done_lock;
284static QemuCond comp_done_cond;
285
286static const QEMUFileOps empty_ops = { };
287
288static bool compression_switch;
289static DecompressParam *decomp_param;
290static QemuThread *decompress_threads;
291static QemuMutex decomp_done_lock;
292static QemuCond decomp_done_cond;
293
294static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
295 ram_addr_t offset);
296
297static void *do_data_compress(void *opaque)
298{
299 CompressParam *param = opaque;
300 RAMBlock *block;
301 ram_addr_t offset;
302
303 qemu_mutex_lock(¶m->mutex);
304 while (!param->quit) {
305 if (param->block) {
306 block = param->block;
307 offset = param->offset;
308 param->block = NULL;
309 qemu_mutex_unlock(¶m->mutex);
310
311 do_compress_ram_page(param->file, block, offset);
312
313 qemu_mutex_lock(&comp_done_lock);
314 param->done = true;
315 qemu_cond_signal(&comp_done_cond);
316 qemu_mutex_unlock(&comp_done_lock);
317
318 qemu_mutex_lock(¶m->mutex);
319 } else {
320 qemu_cond_wait(¶m->cond, ¶m->mutex);
321 }
322 }
323 qemu_mutex_unlock(¶m->mutex);
324
325 return NULL;
326}
327
328static inline void terminate_compression_threads(void)
329{
330 int idx, thread_count;
331
332 thread_count = migrate_compress_threads();
333 for (idx = 0; idx < thread_count; idx++) {
334 qemu_mutex_lock(&comp_param[idx].mutex);
335 comp_param[idx].quit = true;
336 qemu_cond_signal(&comp_param[idx].cond);
337 qemu_mutex_unlock(&comp_param[idx].mutex);
338 }
339}
340
341void migrate_compress_threads_join(void)
342{
343 int i, thread_count;
344
345 if (!migrate_use_compression()) {
346 return;
347 }
348 terminate_compression_threads();
349 thread_count = migrate_compress_threads();
350 for (i = 0; i < thread_count; i++) {
351 qemu_thread_join(compress_threads + i);
352 qemu_fclose(comp_param[i].file);
353 qemu_mutex_destroy(&comp_param[i].mutex);
354 qemu_cond_destroy(&comp_param[i].cond);
355 }
356 qemu_mutex_destroy(&comp_done_lock);
357 qemu_cond_destroy(&comp_done_cond);
358 g_free(compress_threads);
359 g_free(comp_param);
360 compress_threads = NULL;
361 comp_param = NULL;
362}
363
364void migrate_compress_threads_create(void)
365{
366 int i, thread_count;
367
368 if (!migrate_use_compression()) {
369 return;
370 }
371 compression_switch = true;
372 thread_count = migrate_compress_threads();
373 compress_threads = g_new0(QemuThread, thread_count);
374 comp_param = g_new0(CompressParam, thread_count);
375 qemu_cond_init(&comp_done_cond);
376 qemu_mutex_init(&comp_done_lock);
377 for (i = 0; i < thread_count; i++) {
378
379
380
381 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
382 comp_param[i].done = true;
383 comp_param[i].quit = false;
384 qemu_mutex_init(&comp_param[i].mutex);
385 qemu_cond_init(&comp_param[i].cond);
386 qemu_thread_create(compress_threads + i, "compress",
387 do_data_compress, comp_param + i,
388 QEMU_THREAD_JOINABLE);
389 }
390}
391
392
393
394
395
396
397
398
399
400
401
402
403
404static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
405{
406 size_t size, len;
407
408 qemu_put_be64(f, offset);
409 size = 8;
410
411 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
412 len = strlen(block->idstr);
413 qemu_put_byte(f, len);
414 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
415 size += 1 + len;
416 }
417 return size;
418}
419
420
421
422
423
424
425
426static void mig_throttle_guest_down(void)
427{
428 MigrationState *s = migrate_get_current();
429 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
430 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
431
432
433 if (!cpu_throttle_active()) {
434 cpu_throttle_set(pct_initial);
435 } else {
436
437 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
438 }
439}
440
441
442
443
444
445
446
447static void xbzrle_cache_zero_page(ram_addr_t current_addr)
448{
449 if (ram_bulk_stage || !migrate_use_xbzrle()) {
450 return;
451 }
452
453
454
455 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
456 bitmap_sync_count);
457}
458
459#define ENCODING_FLAG_XBZRLE 0x1
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
477 ram_addr_t current_addr, RAMBlock *block,
478 ram_addr_t offset, bool last_stage,
479 uint64_t *bytes_transferred)
480{
481 int encoded_len = 0, bytes_xbzrle;
482 uint8_t *prev_cached_page;
483
484 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
485 acct_info.xbzrle_cache_miss++;
486 if (!last_stage) {
487 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
488 bitmap_sync_count) == -1) {
489 return -1;
490 } else {
491
492
493 *current_data = get_cached_data(XBZRLE.cache, current_addr);
494 }
495 }
496 return -1;
497 }
498
499 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
500
501
502 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
503
504
505 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
506 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
507 TARGET_PAGE_SIZE);
508 if (encoded_len == 0) {
509 DPRINTF("Skipping unmodified page\n");
510 return 0;
511 } else if (encoded_len == -1) {
512 DPRINTF("Overflow\n");
513 acct_info.xbzrle_overflows++;
514
515 if (!last_stage) {
516 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
517 *current_data = prev_cached_page;
518 }
519 return -1;
520 }
521
522
523 if (!last_stage) {
524 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
525 }
526
527
528 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
529 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
530 qemu_put_be16(f, encoded_len);
531 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
532 bytes_xbzrle += encoded_len + 1 + 2;
533 acct_info.xbzrle_pages++;
534 acct_info.xbzrle_bytes += bytes_xbzrle;
535 *bytes_transferred += bytes_xbzrle;
536
537 return 1;
538}
539
540
541
542
543
544
545
546
547
548static inline
549ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
550 ram_addr_t start,
551 ram_addr_t *ram_addr_abs)
552{
553 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
554 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
555 uint64_t rb_size = rb->used_length;
556 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
557 unsigned long *bitmap;
558
559 unsigned long next;
560
561 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
562 if (ram_bulk_stage && nr > base) {
563 next = nr + 1;
564 } else {
565 next = find_next_bit(bitmap, size, nr);
566 }
567
568 *ram_addr_abs = next << TARGET_PAGE_BITS;
569 return (next - base) << TARGET_PAGE_BITS;
570}
571
572static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
573{
574 bool ret;
575 int nr = addr >> TARGET_PAGE_BITS;
576 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
577
578 ret = test_and_clear_bit(nr, bitmap);
579
580 if (ret) {
581 migration_dirty_pages--;
582 }
583 return ret;
584}
585
586static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
587{
588 unsigned long *bitmap;
589 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
590 migration_dirty_pages +=
591 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
592}
593
594
595static int64_t start_time;
596static int64_t bytes_xfer_prev;
597static int64_t num_dirty_pages_period;
598static uint64_t xbzrle_cache_miss_prev;
599static uint64_t iterations_prev;
600
601static void migration_bitmap_sync_init(void)
602{
603 start_time = 0;
604 bytes_xfer_prev = 0;
605 num_dirty_pages_period = 0;
606 xbzrle_cache_miss_prev = 0;
607 iterations_prev = 0;
608}
609
610static void migration_bitmap_sync(void)
611{
612 RAMBlock *block;
613 uint64_t num_dirty_pages_init = migration_dirty_pages;
614 MigrationState *s = migrate_get_current();
615 int64_t end_time;
616 int64_t bytes_xfer_now;
617
618 bitmap_sync_count++;
619
620 if (!bytes_xfer_prev) {
621 bytes_xfer_prev = ram_bytes_transferred();
622 }
623
624 if (!start_time) {
625 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
626 }
627
628 trace_migration_bitmap_sync_start();
629 address_space_sync_dirty_bitmap(&address_space_memory);
630
631 qemu_mutex_lock(&migration_bitmap_mutex);
632 rcu_read_lock();
633 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
634 migration_bitmap_sync_range(block->offset, block->used_length);
635 }
636 rcu_read_unlock();
637 qemu_mutex_unlock(&migration_bitmap_mutex);
638
639 trace_migration_bitmap_sync_end(migration_dirty_pages
640 - num_dirty_pages_init);
641 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
642 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
643
644
645 if (end_time > start_time + 1000) {
646 if (migrate_auto_converge()) {
647
648
649
650
651
652 bytes_xfer_now = ram_bytes_transferred();
653
654 if (s->dirty_pages_rate &&
655 (num_dirty_pages_period * TARGET_PAGE_SIZE >
656 (bytes_xfer_now - bytes_xfer_prev)/2) &&
657 (dirty_rate_high_cnt++ >= 2)) {
658 trace_migration_throttle();
659 dirty_rate_high_cnt = 0;
660 mig_throttle_guest_down();
661 }
662 bytes_xfer_prev = bytes_xfer_now;
663 }
664
665 if (migrate_use_xbzrle()) {
666 if (iterations_prev != acct_info.iterations) {
667 acct_info.xbzrle_cache_miss_rate =
668 (double)(acct_info.xbzrle_cache_miss -
669 xbzrle_cache_miss_prev) /
670 (acct_info.iterations - iterations_prev);
671 }
672 iterations_prev = acct_info.iterations;
673 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
674 }
675 s->dirty_pages_rate = num_dirty_pages_period * 1000
676 / (end_time - start_time);
677 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
678 start_time = end_time;
679 num_dirty_pages_period = 0;
680 }
681 s->dirty_sync_count = bitmap_sync_count;
682 if (migrate_use_events()) {
683 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
684 }
685}
686
687
688
689
690
691
692
693
694
695
696
697
698static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
699 uint8_t *p, uint64_t *bytes_transferred)
700{
701 int pages = -1;
702
703 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
704 acct_info.dup_pages++;
705 *bytes_transferred += save_page_header(f, block,
706 offset | RAM_SAVE_FLAG_COMPRESS);
707 qemu_put_byte(f, 0);
708 *bytes_transferred += 1;
709 pages = 1;
710 }
711
712 return pages;
713}
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
730 bool last_stage, uint64_t *bytes_transferred)
731{
732 int pages = -1;
733 uint64_t bytes_xmit;
734 ram_addr_t current_addr;
735 uint8_t *p;
736 int ret;
737 bool send_async = true;
738 RAMBlock *block = pss->block;
739 ram_addr_t offset = pss->offset;
740
741 p = block->host + offset;
742
743
744 bytes_xmit = 0;
745 ret = ram_control_save_page(f, block->offset,
746 offset, TARGET_PAGE_SIZE, &bytes_xmit);
747 if (bytes_xmit) {
748 *bytes_transferred += bytes_xmit;
749 pages = 1;
750 }
751
752 XBZRLE_cache_lock();
753
754 current_addr = block->offset + offset;
755
756 if (block == last_sent_block) {
757 offset |= RAM_SAVE_FLAG_CONTINUE;
758 }
759 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
760 if (ret != RAM_SAVE_CONTROL_DELAYED) {
761 if (bytes_xmit > 0) {
762 acct_info.norm_pages++;
763 } else if (bytes_xmit == 0) {
764 acct_info.dup_pages++;
765 }
766 }
767 } else {
768 pages = save_zero_page(f, block, offset, p, bytes_transferred);
769 if (pages > 0) {
770
771
772
773 xbzrle_cache_zero_page(current_addr);
774 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
775 pages = save_xbzrle_page(f, &p, current_addr, block,
776 offset, last_stage, bytes_transferred);
777 if (!last_stage) {
778
779
780
781 send_async = false;
782 }
783 }
784 }
785
786
787 if (pages == -1) {
788 *bytes_transferred += save_page_header(f, block,
789 offset | RAM_SAVE_FLAG_PAGE);
790 if (send_async) {
791 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
792 } else {
793 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
794 }
795 *bytes_transferred += TARGET_PAGE_SIZE;
796 pages = 1;
797 acct_info.norm_pages++;
798 }
799
800 XBZRLE_cache_unlock();
801
802 return pages;
803}
804
805static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
806 ram_addr_t offset)
807{
808 int bytes_sent, blen;
809 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
810
811 bytes_sent = save_page_header(f, block, offset |
812 RAM_SAVE_FLAG_COMPRESS_PAGE);
813 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
814 migrate_compress_level());
815 if (blen < 0) {
816 bytes_sent = 0;
817 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
818 error_report("compressed data failed!");
819 } else {
820 bytes_sent += blen;
821 }
822
823 return bytes_sent;
824}
825
826static uint64_t bytes_transferred;
827
828static void flush_compressed_data(QEMUFile *f)
829{
830 int idx, len, thread_count;
831
832 if (!migrate_use_compression()) {
833 return;
834 }
835 thread_count = migrate_compress_threads();
836
837 qemu_mutex_lock(&comp_done_lock);
838 for (idx = 0; idx < thread_count; idx++) {
839 while (!comp_param[idx].done) {
840 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
841 }
842 }
843 qemu_mutex_unlock(&comp_done_lock);
844
845 for (idx = 0; idx < thread_count; idx++) {
846 qemu_mutex_lock(&comp_param[idx].mutex);
847 if (!comp_param[idx].quit) {
848 len = qemu_put_qemu_file(f, comp_param[idx].file);
849 bytes_transferred += len;
850 }
851 qemu_mutex_unlock(&comp_param[idx].mutex);
852 }
853}
854
855static inline void set_compress_params(CompressParam *param, RAMBlock *block,
856 ram_addr_t offset)
857{
858 param->block = block;
859 param->offset = offset;
860}
861
862static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
863 ram_addr_t offset,
864 uint64_t *bytes_transferred)
865{
866 int idx, thread_count, bytes_xmit = -1, pages = -1;
867
868 thread_count = migrate_compress_threads();
869 qemu_mutex_lock(&comp_done_lock);
870 while (true) {
871 for (idx = 0; idx < thread_count; idx++) {
872 if (comp_param[idx].done) {
873 comp_param[idx].done = false;
874 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
875 qemu_mutex_lock(&comp_param[idx].mutex);
876 set_compress_params(&comp_param[idx], block, offset);
877 qemu_cond_signal(&comp_param[idx].cond);
878 qemu_mutex_unlock(&comp_param[idx].mutex);
879 pages = 1;
880 acct_info.norm_pages++;
881 *bytes_transferred += bytes_xmit;
882 break;
883 }
884 }
885 if (pages > 0) {
886 break;
887 } else {
888 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
889 }
890 }
891 qemu_mutex_unlock(&comp_done_lock);
892
893 return pages;
894}
895
896
897
898
899
900
901
902
903
904
905
906
907static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
908 bool last_stage,
909 uint64_t *bytes_transferred)
910{
911 int pages = -1;
912 uint64_t bytes_xmit = 0;
913 uint8_t *p;
914 int ret, blen;
915 RAMBlock *block = pss->block;
916 ram_addr_t offset = pss->offset;
917
918 p = block->host + offset;
919
920 ret = ram_control_save_page(f, block->offset,
921 offset, TARGET_PAGE_SIZE, &bytes_xmit);
922 if (bytes_xmit) {
923 *bytes_transferred += bytes_xmit;
924 pages = 1;
925 }
926 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
927 if (ret != RAM_SAVE_CONTROL_DELAYED) {
928 if (bytes_xmit > 0) {
929 acct_info.norm_pages++;
930 } else if (bytes_xmit == 0) {
931 acct_info.dup_pages++;
932 }
933 }
934 } else {
935
936
937
938
939
940
941 if (block != last_sent_block) {
942 flush_compressed_data(f);
943 pages = save_zero_page(f, block, offset, p, bytes_transferred);
944 if (pages == -1) {
945
946 bytes_xmit = save_page_header(f, block, offset |
947 RAM_SAVE_FLAG_COMPRESS_PAGE);
948 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
949 migrate_compress_level());
950 if (blen > 0) {
951 *bytes_transferred += bytes_xmit + blen;
952 acct_info.norm_pages++;
953 pages = 1;
954 } else {
955 qemu_file_set_error(f, blen);
956 error_report("compressed data failed!");
957 }
958 }
959 } else {
960 offset |= RAM_SAVE_FLAG_CONTINUE;
961 pages = save_zero_page(f, block, offset, p, bytes_transferred);
962 if (pages == -1) {
963 pages = compress_page_with_multi_thread(f, block, offset,
964 bytes_transferred);
965 }
966 }
967 }
968
969 return pages;
970}
971
972
973
974
975
976
977
978
979
980
981
982
983
984static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
985 bool *again, ram_addr_t *ram_addr_abs)
986{
987 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
988 ram_addr_abs);
989 if (pss->complete_round && pss->block == last_seen_block &&
990 pss->offset >= last_offset) {
991
992
993
994
995 *again = false;
996 return false;
997 }
998 if (pss->offset >= pss->block->used_length) {
999
1000 pss->offset = 0;
1001 pss->block = QLIST_NEXT_RCU(pss->block, next);
1002 if (!pss->block) {
1003
1004 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1005
1006 pss->complete_round = true;
1007 ram_bulk_stage = false;
1008 if (migrate_use_xbzrle()) {
1009
1010
1011
1012 flush_compressed_data(f);
1013 compression_switch = false;
1014 }
1015 }
1016
1017 *again = true;
1018 return false;
1019 } else {
1020
1021 *again = true;
1022
1023 return true;
1024 }
1025}
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1036 ram_addr_t *ram_addr_abs)
1037{
1038 RAMBlock *block = NULL;
1039
1040 qemu_mutex_lock(&ms->src_page_req_mutex);
1041 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1042 struct MigrationSrcPageRequest *entry =
1043 QSIMPLEQ_FIRST(&ms->src_page_requests);
1044 block = entry->rb;
1045 *offset = entry->offset;
1046 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1047 TARGET_PAGE_MASK;
1048
1049 if (entry->len > TARGET_PAGE_SIZE) {
1050 entry->len -= TARGET_PAGE_SIZE;
1051 entry->offset += TARGET_PAGE_SIZE;
1052 } else {
1053 memory_region_unref(block->mr);
1054 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1055 g_free(entry);
1056 }
1057 }
1058 qemu_mutex_unlock(&ms->src_page_req_mutex);
1059
1060 return block;
1061}
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1074 ram_addr_t *ram_addr_abs)
1075{
1076 RAMBlock *block;
1077 ram_addr_t offset;
1078 bool dirty;
1079
1080 do {
1081 block = unqueue_page(ms, &offset, ram_addr_abs);
1082
1083
1084
1085
1086
1087
1088 if (block) {
1089 unsigned long *bitmap;
1090 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1091 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1092 if (!dirty) {
1093 trace_get_queued_page_not_dirty(
1094 block->idstr, (uint64_t)offset,
1095 (uint64_t)*ram_addr_abs,
1096 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1097 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1098 } else {
1099 trace_get_queued_page(block->idstr,
1100 (uint64_t)offset,
1101 (uint64_t)*ram_addr_abs);
1102 }
1103 }
1104
1105 } while (block && !dirty);
1106
1107 if (block) {
1108
1109
1110
1111
1112
1113
1114 ram_bulk_stage = false;
1115
1116
1117
1118
1119
1120
1121 pss->block = block;
1122 pss->offset = offset;
1123 }
1124
1125 return !!block;
1126}
1127
1128
1129
1130
1131
1132
1133
1134
1135void flush_page_queue(MigrationState *ms)
1136{
1137 struct MigrationSrcPageRequest *mspr, *next_mspr;
1138
1139
1140
1141 rcu_read_lock();
1142 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1143 memory_region_unref(mspr->rb->mr);
1144 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1145 g_free(mspr);
1146 }
1147 rcu_read_unlock();
1148}
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1159 ram_addr_t start, ram_addr_t len)
1160{
1161 RAMBlock *ramblock;
1162
1163 ms->postcopy_requests++;
1164 rcu_read_lock();
1165 if (!rbname) {
1166
1167 ramblock = ms->last_req_rb;
1168
1169 if (!ramblock) {
1170
1171
1172
1173
1174 error_report("ram_save_queue_pages no previous block");
1175 goto err;
1176 }
1177 } else {
1178 ramblock = qemu_ram_block_by_name(rbname);
1179
1180 if (!ramblock) {
1181
1182 error_report("ram_save_queue_pages no block '%s'", rbname);
1183 goto err;
1184 }
1185 ms->last_req_rb = ramblock;
1186 }
1187 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1188 if (start+len > ramblock->used_length) {
1189 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1190 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1191 __func__, start, len, ramblock->used_length);
1192 goto err;
1193 }
1194
1195 struct MigrationSrcPageRequest *new_entry =
1196 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1197 new_entry->rb = ramblock;
1198 new_entry->offset = start;
1199 new_entry->len = len;
1200
1201 memory_region_ref(ramblock->mr);
1202 qemu_mutex_lock(&ms->src_page_req_mutex);
1203 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1204 qemu_mutex_unlock(&ms->src_page_req_mutex);
1205 rcu_read_unlock();
1206
1207 return 0;
1208
1209err:
1210 rcu_read_unlock();
1211 return -1;
1212}
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1228 PageSearchStatus *pss,
1229 bool last_stage,
1230 uint64_t *bytes_transferred,
1231 ram_addr_t dirty_ram_abs)
1232{
1233 int res = 0;
1234
1235
1236 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1237 unsigned long *unsentmap;
1238 if (compression_switch && migrate_use_compression()) {
1239 res = ram_save_compressed_page(f, pss,
1240 last_stage,
1241 bytes_transferred);
1242 } else {
1243 res = ram_save_page(f, pss, last_stage,
1244 bytes_transferred);
1245 }
1246
1247 if (res < 0) {
1248 return res;
1249 }
1250 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1251 if (unsentmap) {
1252 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1253 }
1254
1255
1256
1257
1258 if (res > 0) {
1259 last_sent_block = pss->block;
1260 }
1261 }
1262
1263 return res;
1264}
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1284 PageSearchStatus *pss,
1285 bool last_stage,
1286 uint64_t *bytes_transferred,
1287 ram_addr_t dirty_ram_abs)
1288{
1289 int tmppages, pages = 0;
1290 do {
1291 tmppages = ram_save_target_page(ms, f, pss, last_stage,
1292 bytes_transferred, dirty_ram_abs);
1293 if (tmppages < 0) {
1294 return tmppages;
1295 }
1296
1297 pages += tmppages;
1298 pss->offset += TARGET_PAGE_SIZE;
1299 dirty_ram_abs += TARGET_PAGE_SIZE;
1300 } while (pss->offset & (qemu_host_page_size - 1));
1301
1302
1303 pss->offset -= TARGET_PAGE_SIZE;
1304 return pages;
1305}
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1324 uint64_t *bytes_transferred)
1325{
1326 PageSearchStatus pss;
1327 MigrationState *ms = migrate_get_current();
1328 int pages = 0;
1329 bool again, found;
1330 ram_addr_t dirty_ram_abs;
1331
1332
1333 pss.block = last_seen_block;
1334 pss.offset = last_offset;
1335 pss.complete_round = false;
1336
1337 if (!pss.block) {
1338 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1339 }
1340
1341 do {
1342 again = true;
1343 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1344
1345 if (!found) {
1346
1347 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1348 }
1349
1350 if (found) {
1351 pages = ram_save_host_page(ms, f, &pss,
1352 last_stage, bytes_transferred,
1353 dirty_ram_abs);
1354 }
1355 } while (!pages && again);
1356
1357 last_seen_block = pss.block;
1358 last_offset = pss.offset;
1359
1360 return pages;
1361}
1362
1363void acct_update_position(QEMUFile *f, size_t size, bool zero)
1364{
1365 uint64_t pages = size / TARGET_PAGE_SIZE;
1366 if (zero) {
1367 acct_info.dup_pages += pages;
1368 } else {
1369 acct_info.norm_pages += pages;
1370 bytes_transferred += size;
1371 qemu_update_position(f, size);
1372 }
1373}
1374
1375static ram_addr_t ram_save_remaining(void)
1376{
1377 return migration_dirty_pages;
1378}
1379
1380uint64_t ram_bytes_remaining(void)
1381{
1382 return ram_save_remaining() * TARGET_PAGE_SIZE;
1383}
1384
1385uint64_t ram_bytes_transferred(void)
1386{
1387 return bytes_transferred;
1388}
1389
1390uint64_t ram_bytes_total(void)
1391{
1392 RAMBlock *block;
1393 uint64_t total = 0;
1394
1395 rcu_read_lock();
1396 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1397 total += block->used_length;
1398 rcu_read_unlock();
1399 return total;
1400}
1401
1402void free_xbzrle_decoded_buf(void)
1403{
1404 g_free(xbzrle_decoded_buf);
1405 xbzrle_decoded_buf = NULL;
1406}
1407
1408static void migration_bitmap_free(struct BitmapRcu *bmap)
1409{
1410 g_free(bmap->bmap);
1411 g_free(bmap->unsentmap);
1412 g_free(bmap);
1413}
1414
1415static void ram_migration_cleanup(void *opaque)
1416{
1417
1418
1419
1420 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1421 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1422 if (bitmap) {
1423 memory_global_dirty_log_stop();
1424 call_rcu(bitmap, migration_bitmap_free, rcu);
1425 }
1426
1427 XBZRLE_cache_lock();
1428 if (XBZRLE.cache) {
1429 cache_fini(XBZRLE.cache);
1430 g_free(XBZRLE.encoded_buf);
1431 g_free(XBZRLE.current_buf);
1432 XBZRLE.cache = NULL;
1433 XBZRLE.encoded_buf = NULL;
1434 XBZRLE.current_buf = NULL;
1435 }
1436 XBZRLE_cache_unlock();
1437}
1438
1439static void reset_ram_globals(void)
1440{
1441 last_seen_block = NULL;
1442 last_sent_block = NULL;
1443 last_offset = 0;
1444 last_version = ram_list.version;
1445 ram_bulk_stage = true;
1446}
1447
1448#define MAX_WAIT 50
1449
1450void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1451{
1452
1453
1454
1455 if (migration_bitmap_rcu) {
1456 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1457 bitmap = g_new(struct BitmapRcu, 1);
1458 bitmap->bmap = bitmap_new(new);
1459
1460
1461
1462
1463
1464
1465 qemu_mutex_lock(&migration_bitmap_mutex);
1466 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1467 bitmap_set(bitmap->bmap, old, new - old);
1468
1469
1470
1471
1472
1473 bitmap->unsentmap = NULL;
1474
1475 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1476 qemu_mutex_unlock(&migration_bitmap_mutex);
1477 migration_dirty_pages += new - old;
1478 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1479 }
1480}
1481
1482
1483
1484
1485
1486
1487void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1488{
1489 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1490
1491 int64_t cur;
1492 int64_t linelen = 128;
1493 char linebuf[129];
1494
1495 if (!todump) {
1496 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1497 }
1498
1499 for (cur = 0; cur < ram_pages; cur += linelen) {
1500 int64_t curb;
1501 bool found = false;
1502
1503
1504
1505
1506 if (cur + linelen > ram_pages) {
1507 linelen = ram_pages - cur;
1508 }
1509 for (curb = 0; curb < linelen; curb++) {
1510 bool thisbit = test_bit(cur + curb, todump);
1511 linebuf[curb] = thisbit ? '1' : '.';
1512 found = found || (thisbit != expected);
1513 }
1514 if (found) {
1515 linebuf[curb] = '\0';
1516 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1517 }
1518 }
1519}
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530static int postcopy_send_discard_bm_ram(MigrationState *ms,
1531 PostcopyDiscardState *pds,
1532 unsigned long start,
1533 unsigned long length)
1534{
1535 unsigned long end = start + length;
1536 unsigned long current;
1537 unsigned long *unsentmap;
1538
1539 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1540 for (current = start; current < end; ) {
1541 unsigned long one = find_next_bit(unsentmap, end, current);
1542
1543 if (one <= end) {
1544 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1545 unsigned long discard_length;
1546
1547 if (zero >= end) {
1548 discard_length = end - one;
1549 } else {
1550 discard_length = zero - one;
1551 }
1552 if (discard_length) {
1553 postcopy_discard_send_range(ms, pds, one, discard_length);
1554 }
1555 current = one + discard_length;
1556 } else {
1557 current = one;
1558 }
1559 }
1560
1561 return 0;
1562}
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572static int postcopy_each_ram_send_discard(MigrationState *ms)
1573{
1574 struct RAMBlock *block;
1575 int ret;
1576
1577 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1578 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1579 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1580 first,
1581 block->idstr);
1582
1583
1584
1585
1586
1587
1588 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1589 block->used_length >> TARGET_PAGE_BITS);
1590 postcopy_discard_send_finish(ms, pds);
1591 if (ret) {
1592 return ret;
1593 }
1594 }
1595
1596 return 0;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1612 RAMBlock *block,
1613 PostcopyDiscardState *pds)
1614{
1615 unsigned long *bitmap;
1616 unsigned long *unsentmap;
1617 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1618 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1619 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1620 unsigned long last = first + (len - 1);
1621 unsigned long run_start;
1622
1623 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1624 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1625
1626 if (unsent_pass) {
1627
1628 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1629 } else {
1630
1631 run_start = find_next_bit(bitmap, last + 1, first);
1632 }
1633
1634 while (run_start <= last) {
1635 bool do_fixup = false;
1636 unsigned long fixup_start_addr;
1637 unsigned long host_offset;
1638
1639
1640
1641
1642
1643 host_offset = run_start % host_ratio;
1644 if (host_offset) {
1645 do_fixup = true;
1646 run_start -= host_offset;
1647 fixup_start_addr = run_start;
1648
1649 run_start = run_start + host_ratio;
1650 } else {
1651
1652 unsigned long run_end;
1653 if (unsent_pass) {
1654 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1655 } else {
1656 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1657 }
1658
1659
1660
1661
1662
1663 host_offset = run_end % host_ratio;
1664 if (host_offset) {
1665 do_fixup = true;
1666 fixup_start_addr = run_end - host_offset;
1667
1668
1669
1670
1671 run_start = fixup_start_addr + host_ratio;
1672 } else {
1673
1674
1675
1676
1677 run_start = run_end + 1;
1678 }
1679 }
1680
1681 if (do_fixup) {
1682 unsigned long page;
1683
1684
1685 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1686
1687
1688
1689
1690
1691
1692
1693 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1694 host_ratio);
1695 }
1696
1697
1698 for (page = fixup_start_addr;
1699 page < fixup_start_addr + host_ratio; page++) {
1700
1701 set_bit(page, unsentmap);
1702
1703
1704
1705
1706
1707 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1708 }
1709 }
1710
1711 if (unsent_pass) {
1712
1713 run_start = find_next_zero_bit(unsentmap, last + 1,
1714 run_start);
1715 } else {
1716
1717 run_start = find_next_bit(bitmap, last + 1, run_start);
1718 }
1719 }
1720}
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730static int postcopy_chunk_hostpages(MigrationState *ms)
1731{
1732 struct RAMBlock *block;
1733
1734 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1735
1736 return 0;
1737 }
1738
1739
1740 last_seen_block = NULL;
1741 last_sent_block = NULL;
1742 last_offset = 0;
1743
1744 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1745 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1746
1747 PostcopyDiscardState *pds =
1748 postcopy_discard_send_init(ms, first, block->idstr);
1749
1750
1751 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1752
1753
1754
1755
1756 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1757
1758 postcopy_discard_send_finish(ms, pds);
1759 }
1760
1761 return 0;
1762}
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1774{
1775 int ret;
1776 unsigned long *bitmap, *unsentmap;
1777
1778 rcu_read_lock();
1779
1780
1781 migration_bitmap_sync();
1782
1783 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1784 if (!unsentmap) {
1785
1786
1787
1788
1789 error_report("migration ram resized during precopy phase");
1790 rcu_read_unlock();
1791 return -EINVAL;
1792 }
1793
1794
1795 ret = postcopy_chunk_hostpages(ms);
1796 if (ret) {
1797 rcu_read_unlock();
1798 return ret;
1799 }
1800
1801
1802
1803
1804 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1805 bitmap_or(unsentmap, unsentmap, bitmap,
1806 last_ram_offset() >> TARGET_PAGE_BITS);
1807
1808
1809 trace_ram_postcopy_send_discard_bitmap();
1810#ifdef DEBUG_POSTCOPY
1811 ram_debug_dump_bitmap(unsentmap, true);
1812#endif
1813
1814 ret = postcopy_each_ram_send_discard(ms);
1815 rcu_read_unlock();
1816
1817 return ret;
1818}
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828int ram_discard_range(MigrationIncomingState *mis,
1829 const char *block_name,
1830 uint64_t start, size_t length)
1831{
1832 int ret = -1;
1833
1834 rcu_read_lock();
1835 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1836
1837 if (!rb) {
1838 error_report("ram_discard_range: Failed to find block '%s'",
1839 block_name);
1840 goto err;
1841 }
1842
1843 uint8_t *host_startaddr = rb->host + start;
1844
1845 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1846 error_report("ram_discard_range: Unaligned start address: %p",
1847 host_startaddr);
1848 goto err;
1849 }
1850
1851 if ((start + length) <= rb->used_length) {
1852 uint8_t *host_endaddr = host_startaddr + length;
1853 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1854 error_report("ram_discard_range: Unaligned end address: %p",
1855 host_endaddr);
1856 goto err;
1857 }
1858 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1859 } else {
1860 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
1861 "/%zx/" RAM_ADDR_FMT")",
1862 block_name, start, length, rb->used_length);
1863 }
1864
1865err:
1866 rcu_read_unlock();
1867
1868 return ret;
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878static int ram_save_setup(QEMUFile *f, void *opaque)
1879{
1880 RAMBlock *block;
1881 int64_t ram_bitmap_pages;
1882
1883 dirty_rate_high_cnt = 0;
1884 bitmap_sync_count = 0;
1885 migration_bitmap_sync_init();
1886 qemu_mutex_init(&migration_bitmap_mutex);
1887
1888 if (migrate_use_xbzrle()) {
1889 XBZRLE_cache_lock();
1890 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1891 TARGET_PAGE_SIZE,
1892 TARGET_PAGE_SIZE);
1893 if (!XBZRLE.cache) {
1894 XBZRLE_cache_unlock();
1895 error_report("Error creating cache");
1896 return -1;
1897 }
1898 XBZRLE_cache_unlock();
1899
1900
1901 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1902 if (!XBZRLE.encoded_buf) {
1903 error_report("Error allocating encoded_buf");
1904 return -1;
1905 }
1906
1907 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1908 if (!XBZRLE.current_buf) {
1909 error_report("Error allocating current_buf");
1910 g_free(XBZRLE.encoded_buf);
1911 XBZRLE.encoded_buf = NULL;
1912 return -1;
1913 }
1914
1915 acct_clear();
1916 }
1917
1918
1919 qemu_mutex_lock_iothread();
1920
1921 qemu_mutex_lock_ramlist();
1922 rcu_read_lock();
1923 bytes_transferred = 0;
1924 reset_ram_globals();
1925
1926 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1927 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1928 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1929 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1930
1931 if (migrate_postcopy_ram()) {
1932 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1933 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1934 }
1935
1936
1937
1938
1939
1940 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1941
1942 memory_global_dirty_log_start();
1943 migration_bitmap_sync();
1944 qemu_mutex_unlock_ramlist();
1945 qemu_mutex_unlock_iothread();
1946
1947 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1948
1949 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1950 qemu_put_byte(f, strlen(block->idstr));
1951 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1952 qemu_put_be64(f, block->used_length);
1953 }
1954
1955 rcu_read_unlock();
1956
1957 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1958 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1959
1960 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1961
1962 return 0;
1963}
1964
1965static int ram_save_iterate(QEMUFile *f, void *opaque)
1966{
1967 int ret;
1968 int i;
1969 int64_t t0;
1970 int pages_sent = 0;
1971
1972 rcu_read_lock();
1973 if (ram_list.version != last_version) {
1974 reset_ram_globals();
1975 }
1976
1977
1978 smp_rmb();
1979
1980 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1981
1982 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1983 i = 0;
1984 while ((ret = qemu_file_rate_limit(f)) == 0) {
1985 int pages;
1986
1987 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1988
1989 if (pages == 0) {
1990 break;
1991 }
1992 pages_sent += pages;
1993 acct_info.iterations++;
1994
1995
1996
1997
1998
1999
2000 if ((i & 63) == 0) {
2001 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2002 if (t1 > MAX_WAIT) {
2003 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2004 t1, i);
2005 break;
2006 }
2007 }
2008 i++;
2009 }
2010 flush_compressed_data(f);
2011 rcu_read_unlock();
2012
2013
2014
2015
2016
2017 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2018
2019 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2020 bytes_transferred += 8;
2021
2022 ret = qemu_file_get_error(f);
2023 if (ret < 0) {
2024 return ret;
2025 }
2026
2027 return pages_sent;
2028}
2029
2030
2031static int ram_save_complete(QEMUFile *f, void *opaque)
2032{
2033 rcu_read_lock();
2034
2035 if (!migration_in_postcopy(migrate_get_current())) {
2036 migration_bitmap_sync();
2037 }
2038
2039 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2040
2041
2042
2043
2044 while (true) {
2045 int pages;
2046
2047 pages = ram_find_and_save_block(f, true, &bytes_transferred);
2048
2049 if (pages == 0) {
2050 break;
2051 }
2052 }
2053
2054 flush_compressed_data(f);
2055 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2056
2057 rcu_read_unlock();
2058
2059 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2060
2061 return 0;
2062}
2063
2064static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2065 uint64_t *non_postcopiable_pending,
2066 uint64_t *postcopiable_pending)
2067{
2068 uint64_t remaining_size;
2069
2070 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2071
2072 if (!migration_in_postcopy(migrate_get_current()) &&
2073 remaining_size < max_size) {
2074 qemu_mutex_lock_iothread();
2075 rcu_read_lock();
2076 migration_bitmap_sync();
2077 rcu_read_unlock();
2078 qemu_mutex_unlock_iothread();
2079 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2080 }
2081
2082
2083 *postcopiable_pending += remaining_size;
2084}
2085
2086static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2087{
2088 unsigned int xh_len;
2089 int xh_flags;
2090 uint8_t *loaded_data;
2091
2092 if (!xbzrle_decoded_buf) {
2093 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2094 }
2095 loaded_data = xbzrle_decoded_buf;
2096
2097
2098 xh_flags = qemu_get_byte(f);
2099 xh_len = qemu_get_be16(f);
2100
2101 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2102 error_report("Failed to load XBZRLE page - wrong compression!");
2103 return -1;
2104 }
2105
2106 if (xh_len > TARGET_PAGE_SIZE) {
2107 error_report("Failed to load XBZRLE page - len overflow!");
2108 return -1;
2109 }
2110
2111 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2112
2113
2114 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2115 TARGET_PAGE_SIZE) == -1) {
2116 error_report("Failed to load XBZRLE page - decode error!");
2117 return -1;
2118 }
2119
2120 return 0;
2121}
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2133 int flags)
2134{
2135 static RAMBlock *block = NULL;
2136 char id[256];
2137 uint8_t len;
2138
2139 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2140 if (!block) {
2141 error_report("Ack, bad migration stream!");
2142 return NULL;
2143 }
2144 return block;
2145 }
2146
2147 len = qemu_get_byte(f);
2148 qemu_get_buffer(f, (uint8_t *)id, len);
2149 id[len] = 0;
2150
2151 block = qemu_ram_block_by_name(id);
2152 if (!block) {
2153 error_report("Can't find block %s", id);
2154 return NULL;
2155 }
2156
2157 return block;
2158}
2159
2160static inline void *host_from_ram_block_offset(RAMBlock *block,
2161 ram_addr_t offset)
2162{
2163 if (!offset_in_ramblock(block, offset)) {
2164 return NULL;
2165 }
2166
2167 return block->host + offset;
2168}
2169
2170
2171
2172
2173
2174void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2175{
2176 if (ch != 0 || !is_zero_range(host, size)) {
2177 memset(host, ch, size);
2178 }
2179}
2180
2181static void *do_data_decompress(void *opaque)
2182{
2183 DecompressParam *param = opaque;
2184 unsigned long pagesize;
2185 uint8_t *des;
2186 int len;
2187
2188 qemu_mutex_lock(¶m->mutex);
2189 while (!param->quit) {
2190 if (param->des) {
2191 des = param->des;
2192 len = param->len;
2193 param->des = 0;
2194 qemu_mutex_unlock(¶m->mutex);
2195
2196 pagesize = TARGET_PAGE_SIZE;
2197
2198
2199
2200
2201
2202 uncompress((Bytef *)des, &pagesize,
2203 (const Bytef *)param->compbuf, len);
2204
2205 qemu_mutex_lock(&decomp_done_lock);
2206 param->done = true;
2207 qemu_cond_signal(&decomp_done_cond);
2208 qemu_mutex_unlock(&decomp_done_lock);
2209
2210 qemu_mutex_lock(¶m->mutex);
2211 } else {
2212 qemu_cond_wait(¶m->cond, ¶m->mutex);
2213 }
2214 }
2215 qemu_mutex_unlock(¶m->mutex);
2216
2217 return NULL;
2218}
2219
2220static void wait_for_decompress_done(void)
2221{
2222 int idx, thread_count;
2223
2224 if (!migrate_use_compression()) {
2225 return;
2226 }
2227
2228 thread_count = migrate_decompress_threads();
2229 qemu_mutex_lock(&decomp_done_lock);
2230 for (idx = 0; idx < thread_count; idx++) {
2231 while (!decomp_param[idx].done) {
2232 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2233 }
2234 }
2235 qemu_mutex_unlock(&decomp_done_lock);
2236}
2237
2238void migrate_decompress_threads_create(void)
2239{
2240 int i, thread_count;
2241
2242 thread_count = migrate_decompress_threads();
2243 decompress_threads = g_new0(QemuThread, thread_count);
2244 decomp_param = g_new0(DecompressParam, thread_count);
2245 qemu_mutex_init(&decomp_done_lock);
2246 qemu_cond_init(&decomp_done_cond);
2247 for (i = 0; i < thread_count; i++) {
2248 qemu_mutex_init(&decomp_param[i].mutex);
2249 qemu_cond_init(&decomp_param[i].cond);
2250 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2251 decomp_param[i].done = true;
2252 decomp_param[i].quit = false;
2253 qemu_thread_create(decompress_threads + i, "decompress",
2254 do_data_decompress, decomp_param + i,
2255 QEMU_THREAD_JOINABLE);
2256 }
2257}
2258
2259void migrate_decompress_threads_join(void)
2260{
2261 int i, thread_count;
2262
2263 thread_count = migrate_decompress_threads();
2264 for (i = 0; i < thread_count; i++) {
2265 qemu_mutex_lock(&decomp_param[i].mutex);
2266 decomp_param[i].quit = true;
2267 qemu_cond_signal(&decomp_param[i].cond);
2268 qemu_mutex_unlock(&decomp_param[i].mutex);
2269 }
2270 for (i = 0; i < thread_count; i++) {
2271 qemu_thread_join(decompress_threads + i);
2272 qemu_mutex_destroy(&decomp_param[i].mutex);
2273 qemu_cond_destroy(&decomp_param[i].cond);
2274 g_free(decomp_param[i].compbuf);
2275 }
2276 g_free(decompress_threads);
2277 g_free(decomp_param);
2278 decompress_threads = NULL;
2279 decomp_param = NULL;
2280}
2281
2282static void decompress_data_with_multi_threads(QEMUFile *f,
2283 void *host, int len)
2284{
2285 int idx, thread_count;
2286
2287 thread_count = migrate_decompress_threads();
2288 qemu_mutex_lock(&decomp_done_lock);
2289 while (true) {
2290 for (idx = 0; idx < thread_count; idx++) {
2291 if (decomp_param[idx].done) {
2292 decomp_param[idx].done = false;
2293 qemu_mutex_lock(&decomp_param[idx].mutex);
2294 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2295 decomp_param[idx].des = host;
2296 decomp_param[idx].len = len;
2297 qemu_cond_signal(&decomp_param[idx].cond);
2298 qemu_mutex_unlock(&decomp_param[idx].mutex);
2299 break;
2300 }
2301 }
2302 if (idx < thread_count) {
2303 break;
2304 } else {
2305 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2306 }
2307 }
2308 qemu_mutex_unlock(&decomp_done_lock);
2309}
2310
2311
2312
2313
2314
2315int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2316{
2317 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2318
2319 return postcopy_ram_incoming_init(mis, ram_pages);
2320}
2321
2322
2323
2324
2325
2326static int ram_load_postcopy(QEMUFile *f)
2327{
2328 int flags = 0, ret = 0;
2329 bool place_needed = false;
2330 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2331 MigrationIncomingState *mis = migration_incoming_get_current();
2332
2333 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2334 void *last_host = NULL;
2335 bool all_zero = false;
2336
2337 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2338 ram_addr_t addr;
2339 void *host = NULL;
2340 void *page_buffer = NULL;
2341 void *place_source = NULL;
2342 uint8_t ch;
2343
2344 addr = qemu_get_be64(f);
2345 flags = addr & ~TARGET_PAGE_MASK;
2346 addr &= TARGET_PAGE_MASK;
2347
2348 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2349 place_needed = false;
2350 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2351 RAMBlock *block = ram_block_from_stream(f, flags);
2352
2353 host = host_from_ram_block_offset(block, addr);
2354 if (!host) {
2355 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2356 ret = -EINVAL;
2357 break;
2358 }
2359
2360
2361
2362
2363
2364
2365
2366
2367 page_buffer = postcopy_host_page +
2368 ((uintptr_t)host & ~qemu_host_page_mask);
2369
2370 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2371 all_zero = true;
2372 } else {
2373
2374 if (host != (last_host + TARGET_PAGE_SIZE)) {
2375 error_report("Non-sequential target page %p/%p",
2376 host, last_host);
2377 ret = -EINVAL;
2378 break;
2379 }
2380 }
2381
2382
2383
2384
2385
2386
2387 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2388 ~qemu_host_page_mask) == 0;
2389 place_source = postcopy_host_page;
2390 }
2391 last_host = host;
2392
2393 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2394 case RAM_SAVE_FLAG_COMPRESS:
2395 ch = qemu_get_byte(f);
2396 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2397 if (ch) {
2398 all_zero = false;
2399 }
2400 break;
2401
2402 case RAM_SAVE_FLAG_PAGE:
2403 all_zero = false;
2404 if (!place_needed || !matching_page_sizes) {
2405 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2406 } else {
2407
2408
2409
2410
2411 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2412 TARGET_PAGE_SIZE);
2413 }
2414 break;
2415 case RAM_SAVE_FLAG_EOS:
2416
2417 break;
2418 default:
2419 error_report("Unknown combination of migration flags: %#x"
2420 " (postcopy mode)", flags);
2421 ret = -EINVAL;
2422 }
2423
2424 if (place_needed) {
2425
2426 if (all_zero) {
2427 ret = postcopy_place_page_zero(mis,
2428 host + TARGET_PAGE_SIZE -
2429 qemu_host_page_size);
2430 } else {
2431 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2432 qemu_host_page_size,
2433 place_source);
2434 }
2435 }
2436 if (!ret) {
2437 ret = qemu_file_get_error(f);
2438 }
2439 }
2440
2441 return ret;
2442}
2443
2444static int ram_load(QEMUFile *f, void *opaque, int version_id)
2445{
2446 int flags = 0, ret = 0;
2447 static uint64_t seq_iter;
2448 int len = 0;
2449
2450
2451
2452
2453 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2454
2455 seq_iter++;
2456
2457 if (version_id != 4) {
2458 ret = -EINVAL;
2459 }
2460
2461
2462
2463
2464
2465
2466 rcu_read_lock();
2467
2468 if (postcopy_running) {
2469 ret = ram_load_postcopy(f);
2470 }
2471
2472 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2473 ram_addr_t addr, total_ram_bytes;
2474 void *host = NULL;
2475 uint8_t ch;
2476
2477 addr = qemu_get_be64(f);
2478 flags = addr & ~TARGET_PAGE_MASK;
2479 addr &= TARGET_PAGE_MASK;
2480
2481 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2482 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2483 RAMBlock *block = ram_block_from_stream(f, flags);
2484
2485 host = host_from_ram_block_offset(block, addr);
2486 if (!host) {
2487 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2488 ret = -EINVAL;
2489 break;
2490 }
2491 }
2492
2493 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2494 case RAM_SAVE_FLAG_MEM_SIZE:
2495
2496 total_ram_bytes = addr;
2497 while (!ret && total_ram_bytes) {
2498 RAMBlock *block;
2499 char id[256];
2500 ram_addr_t length;
2501
2502 len = qemu_get_byte(f);
2503 qemu_get_buffer(f, (uint8_t *)id, len);
2504 id[len] = 0;
2505 length = qemu_get_be64(f);
2506
2507 block = qemu_ram_block_by_name(id);
2508 if (block) {
2509 if (length != block->used_length) {
2510 Error *local_err = NULL;
2511
2512 ret = qemu_ram_resize(block, length,
2513 &local_err);
2514 if (local_err) {
2515 error_report_err(local_err);
2516 }
2517 }
2518 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2519 block->idstr);
2520 } else {
2521 error_report("Unknown ramblock \"%s\", cannot "
2522 "accept migration", id);
2523 ret = -EINVAL;
2524 }
2525
2526 total_ram_bytes -= length;
2527 }
2528 break;
2529
2530 case RAM_SAVE_FLAG_COMPRESS:
2531 ch = qemu_get_byte(f);
2532 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2533 break;
2534
2535 case RAM_SAVE_FLAG_PAGE:
2536 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2537 break;
2538
2539 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2540 len = qemu_get_be32(f);
2541 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2542 error_report("Invalid compressed data length: %d", len);
2543 ret = -EINVAL;
2544 break;
2545 }
2546 decompress_data_with_multi_threads(f, host, len);
2547 break;
2548
2549 case RAM_SAVE_FLAG_XBZRLE:
2550 if (load_xbzrle(f, addr, host) < 0) {
2551 error_report("Failed to decompress XBZRLE page at "
2552 RAM_ADDR_FMT, addr);
2553 ret = -EINVAL;
2554 break;
2555 }
2556 break;
2557 case RAM_SAVE_FLAG_EOS:
2558
2559 break;
2560 default:
2561 if (flags & RAM_SAVE_FLAG_HOOK) {
2562 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2563 } else {
2564 error_report("Unknown combination of migration flags: %#x",
2565 flags);
2566 ret = -EINVAL;
2567 }
2568 }
2569 if (!ret) {
2570 ret = qemu_file_get_error(f);
2571 }
2572 }
2573
2574 wait_for_decompress_done();
2575 rcu_read_unlock();
2576 DPRINTF("Completed load of VM with exit code %d seq iteration "
2577 "%" PRIu64 "\n", ret, seq_iter);
2578 return ret;
2579}
2580
2581static SaveVMHandlers savevm_ram_handlers = {
2582 .save_live_setup = ram_save_setup,
2583 .save_live_iterate = ram_save_iterate,
2584 .save_live_complete_postcopy = ram_save_complete,
2585 .save_live_complete_precopy = ram_save_complete,
2586 .save_live_pending = ram_save_pending,
2587 .load_state = ram_load,
2588 .cleanup = ram_migration_cleanup,
2589};
2590
2591void ram_mig_init(void)
2592{
2593 qemu_mutex_init(&XBZRLE.lock);
2594 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2595}
2596