qemu/arch_init.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include <stdint.h>
  25#include <stdarg.h>
  26#include <stdlib.h>
  27#ifndef _WIN32
  28#include <sys/types.h>
  29#include <sys/mman.h>
  30#endif
  31#include "config.h"
  32#include "monitor/monitor.h"
  33#include "sysemu/sysemu.h"
  34#include "qemu/bitops.h"
  35#include "qemu/bitmap.h"
  36#include "sysemu/arch_init.h"
  37#include "audio/audio.h"
  38#include "hw/i386/pc.h"
  39#include "hw/pci/pci.h"
  40#include "hw/audio/audio.h"
  41#include "sysemu/kvm.h"
  42#include "migration/migration.h"
  43#include "hw/i386/smbios.h"
  44#include "exec/address-spaces.h"
  45#include "hw/audio/pcspk.h"
  46#include "migration/page_cache.h"
  47#include "qemu/config-file.h"
  48#include "qmp-commands.h"
  49#include "trace.h"
  50#include "exec/cpu-all.h"
  51#include "hw/acpi/acpi.h"
  52
  53#ifdef DEBUG_ARCH_INIT
  54#define DPRINTF(fmt, ...) \
  55    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
  56#else
  57#define DPRINTF(fmt, ...) \
  58    do { } while (0)
  59#endif
  60
  61#ifdef TARGET_SPARC
  62int graphic_width = 1024;
  63int graphic_height = 768;
  64int graphic_depth = 8;
  65#else
  66int graphic_width = 800;
  67int graphic_height = 600;
  68int graphic_depth = 32;
  69#endif
  70
  71
  72#if defined(TARGET_ALPHA)
  73#define QEMU_ARCH QEMU_ARCH_ALPHA
  74#elif defined(TARGET_ARM)
  75#define QEMU_ARCH QEMU_ARCH_ARM
  76#elif defined(TARGET_CRIS)
  77#define QEMU_ARCH QEMU_ARCH_CRIS
  78#elif defined(TARGET_I386)
  79#define QEMU_ARCH QEMU_ARCH_I386
  80#elif defined(TARGET_M68K)
  81#define QEMU_ARCH QEMU_ARCH_M68K
  82#elif defined(TARGET_LM32)
  83#define QEMU_ARCH QEMU_ARCH_LM32
  84#elif defined(TARGET_MICROBLAZE)
  85#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
  86#elif defined(TARGET_MIPS)
  87#define QEMU_ARCH QEMU_ARCH_MIPS
  88#elif defined(TARGET_MOXIE)
  89#define QEMU_ARCH QEMU_ARCH_MOXIE
  90#elif defined(TARGET_OPENRISC)
  91#define QEMU_ARCH QEMU_ARCH_OPENRISC
  92#elif defined(TARGET_PPC)
  93#define QEMU_ARCH QEMU_ARCH_PPC
  94#elif defined(TARGET_S390X)
  95#define QEMU_ARCH QEMU_ARCH_S390X
  96#elif defined(TARGET_SH4)
  97#define QEMU_ARCH QEMU_ARCH_SH4
  98#elif defined(TARGET_SPARC)
  99#define QEMU_ARCH QEMU_ARCH_SPARC
 100#elif defined(TARGET_XTENSA)
 101#define QEMU_ARCH QEMU_ARCH_XTENSA
 102#elif defined(TARGET_UNICORE32)
 103#define QEMU_ARCH QEMU_ARCH_UNICORE32
 104#endif
 105
 106const uint32_t arch_type = QEMU_ARCH;
 107static bool mig_throttle_on;
 108static int dirty_rate_high_cnt;
 109static void check_guest_throttling(void);
 110
 111/***********************************************************/
 112/* ram save/restore */
 113
 114#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
 115#define RAM_SAVE_FLAG_COMPRESS 0x02
 116#define RAM_SAVE_FLAG_MEM_SIZE 0x04
 117#define RAM_SAVE_FLAG_PAGE     0x08
 118#define RAM_SAVE_FLAG_EOS      0x10
 119#define RAM_SAVE_FLAG_CONTINUE 0x20
 120#define RAM_SAVE_FLAG_XBZRLE   0x40
 121/* 0x80 is reserved in migration.h start with 0x100 next */
 122
 123
 124static struct defconfig_file {
 125    const char *filename;
 126    /* Indicates it is an user config file (disabled by -no-user-config) */
 127    bool userconfig;
 128} default_config_files[] = {
 129    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
 130    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
 131    { NULL }, /* end of list */
 132};
 133
 134
 135int qemu_read_default_config_files(bool userconfig)
 136{
 137    int ret;
 138    struct defconfig_file *f;
 139
 140    for (f = default_config_files; f->filename; f++) {
 141        if (!userconfig && f->userconfig) {
 142            continue;
 143        }
 144        ret = qemu_read_config_file(f->filename);
 145        if (ret < 0 && ret != -ENOENT) {
 146            return ret;
 147        }
 148    }
 149
 150    return 0;
 151}
 152
 153static inline bool is_zero_page(uint8_t *p)
 154{
 155    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
 156        TARGET_PAGE_SIZE;
 157}
 158
 159/* struct contains XBZRLE cache and a static page
 160   used by the compression */
 161static struct {
 162    /* buffer used for XBZRLE encoding */
 163    uint8_t *encoded_buf;
 164    /* buffer for storing page content */
 165    uint8_t *current_buf;
 166    /* buffer used for XBZRLE decoding */
 167    uint8_t *decoded_buf;
 168    /* Cache for XBZRLE */
 169    PageCache *cache;
 170} XBZRLE = {
 171    .encoded_buf = NULL,
 172    .current_buf = NULL,
 173    .decoded_buf = NULL,
 174    .cache = NULL,
 175};
 176
 177
 178int64_t xbzrle_cache_resize(int64_t new_size)
 179{
 180    if (XBZRLE.cache != NULL) {
 181        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
 182            TARGET_PAGE_SIZE;
 183    }
 184    return pow2floor(new_size);
 185}
 186
 187/* accounting for migration statistics */
 188typedef struct AccountingInfo {
 189    uint64_t dup_pages;
 190    uint64_t skipped_pages;
 191    uint64_t norm_pages;
 192    uint64_t iterations;
 193    uint64_t xbzrle_bytes;
 194    uint64_t xbzrle_pages;
 195    uint64_t xbzrle_cache_miss;
 196    uint64_t xbzrle_overflows;
 197} AccountingInfo;
 198
 199static AccountingInfo acct_info;
 200
 201static void acct_clear(void)
 202{
 203    memset(&acct_info, 0, sizeof(acct_info));
 204}
 205
 206uint64_t dup_mig_bytes_transferred(void)
 207{
 208    return acct_info.dup_pages * TARGET_PAGE_SIZE;
 209}
 210
 211uint64_t dup_mig_pages_transferred(void)
 212{
 213    return acct_info.dup_pages;
 214}
 215
 216uint64_t skipped_mig_bytes_transferred(void)
 217{
 218    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
 219}
 220
 221uint64_t skipped_mig_pages_transferred(void)
 222{
 223    return acct_info.skipped_pages;
 224}
 225
 226uint64_t norm_mig_bytes_transferred(void)
 227{
 228    return acct_info.norm_pages * TARGET_PAGE_SIZE;
 229}
 230
 231uint64_t norm_mig_pages_transferred(void)
 232{
 233    return acct_info.norm_pages;
 234}
 235
 236uint64_t xbzrle_mig_bytes_transferred(void)
 237{
 238    return acct_info.xbzrle_bytes;
 239}
 240
 241uint64_t xbzrle_mig_pages_transferred(void)
 242{
 243    return acct_info.xbzrle_pages;
 244}
 245
 246uint64_t xbzrle_mig_pages_cache_miss(void)
 247{
 248    return acct_info.xbzrle_cache_miss;
 249}
 250
 251uint64_t xbzrle_mig_pages_overflow(void)
 252{
 253    return acct_info.xbzrle_overflows;
 254}
 255
 256static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
 257                             int cont, int flag)
 258{
 259    size_t size;
 260
 261    qemu_put_be64(f, offset | cont | flag);
 262    size = 8;
 263
 264    if (!cont) {
 265        qemu_put_byte(f, strlen(block->idstr));
 266        qemu_put_buffer(f, (uint8_t *)block->idstr,
 267                        strlen(block->idstr));
 268        size += 1 + strlen(block->idstr);
 269    }
 270    return size;
 271}
 272
 273#define ENCODING_FLAG_XBZRLE 0x1
 274
 275static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
 276                            ram_addr_t current_addr, RAMBlock *block,
 277                            ram_addr_t offset, int cont, bool last_stage)
 278{
 279    int encoded_len = 0, bytes_sent = -1;
 280    uint8_t *prev_cached_page;
 281
 282    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
 283        if (!last_stage) {
 284            cache_insert(XBZRLE.cache, current_addr, current_data);
 285        }
 286        acct_info.xbzrle_cache_miss++;
 287        return -1;
 288    }
 289
 290    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 291
 292    /* save current buffer into memory */
 293    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
 294
 295    /* XBZRLE encoding (if there is no overflow) */
 296    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 297                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 298                                       TARGET_PAGE_SIZE);
 299    if (encoded_len == 0) {
 300        DPRINTF("Skipping unmodified page\n");
 301        return 0;
 302    } else if (encoded_len == -1) {
 303        DPRINTF("Overflow\n");
 304        acct_info.xbzrle_overflows++;
 305        /* update data in the cache */
 306        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
 307        return -1;
 308    }
 309
 310    /* we need to update the data in the cache, in order to get the same data */
 311    if (!last_stage) {
 312        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 313    }
 314
 315    /* Send XBZRLE based compressed page */
 316    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
 317    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
 318    qemu_put_be16(f, encoded_len);
 319    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
 320    bytes_sent += encoded_len + 1 + 2;
 321    acct_info.xbzrle_pages++;
 322    acct_info.xbzrle_bytes += bytes_sent;
 323
 324    return bytes_sent;
 325}
 326
 327
 328/* This is the last block that we have visited serching for dirty pages
 329 */
 330static RAMBlock *last_seen_block;
 331/* This is the last block from where we have sent data */
 332static RAMBlock *last_sent_block;
 333static ram_addr_t last_offset;
 334static unsigned long *migration_bitmap;
 335static uint64_t migration_dirty_pages;
 336static uint32_t last_version;
 337static bool ram_bulk_stage;
 338
 339static inline
 340ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
 341                                                 ram_addr_t start)
 342{
 343    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
 344    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
 345    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
 346
 347    unsigned long next;
 348
 349    if (ram_bulk_stage && nr > base) {
 350        next = nr + 1;
 351    } else {
 352        next = find_next_bit(migration_bitmap, size, nr);
 353    }
 354
 355    if (next < size) {
 356        clear_bit(next, migration_bitmap);
 357        migration_dirty_pages--;
 358    }
 359    return (next - base) << TARGET_PAGE_BITS;
 360}
 361
 362static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
 363                                              ram_addr_t offset)
 364{
 365    bool ret;
 366    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
 367
 368    ret = test_and_set_bit(nr, migration_bitmap);
 369
 370    if (!ret) {
 371        migration_dirty_pages++;
 372    }
 373    return ret;
 374}
 375
 376/* Needs iothread lock! */
 377
 378static void migration_bitmap_sync(void)
 379{
 380    RAMBlock *block;
 381    ram_addr_t addr;
 382    uint64_t num_dirty_pages_init = migration_dirty_pages;
 383    MigrationState *s = migrate_get_current();
 384    static int64_t start_time;
 385    static int64_t bytes_xfer_prev;
 386    static int64_t num_dirty_pages_period;
 387    int64_t end_time;
 388    int64_t bytes_xfer_now;
 389
 390    if (!bytes_xfer_prev) {
 391        bytes_xfer_prev = ram_bytes_transferred();
 392    }
 393
 394    if (!start_time) {
 395        start_time = qemu_get_clock_ms(rt_clock);
 396    }
 397
 398    trace_migration_bitmap_sync_start();
 399    address_space_sync_dirty_bitmap(&address_space_memory);
 400
 401    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 402        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
 403            if (memory_region_test_and_clear_dirty(block->mr,
 404                                                   addr, TARGET_PAGE_SIZE,
 405                                                   DIRTY_MEMORY_MIGRATION)) {
 406                migration_bitmap_set_dirty(block->mr, addr);
 407            }
 408        }
 409    }
 410    trace_migration_bitmap_sync_end(migration_dirty_pages
 411                                    - num_dirty_pages_init);
 412    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
 413    end_time = qemu_get_clock_ms(rt_clock);
 414
 415    /* more than 1 second = 1000 millisecons */
 416    if (end_time > start_time + 1000) {
 417        if (migrate_auto_converge()) {
 418            /* The following detection logic can be refined later. For now:
 419               Check to see if the dirtied bytes is 50% more than the approx.
 420               amount of bytes that just got transferred since the last time we
 421               were in this routine. If that happens >N times (for now N==4)
 422               we turn on the throttle down logic */
 423            bytes_xfer_now = ram_bytes_transferred();
 424            if (s->dirty_pages_rate &&
 425               (num_dirty_pages_period * TARGET_PAGE_SIZE >
 426                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
 427               (dirty_rate_high_cnt++ > 4)) {
 428                    trace_migration_throttle();
 429                    mig_throttle_on = true;
 430                    dirty_rate_high_cnt = 0;
 431             }
 432             bytes_xfer_prev = bytes_xfer_now;
 433        } else {
 434             mig_throttle_on = false;
 435        }
 436        s->dirty_pages_rate = num_dirty_pages_period * 1000
 437            / (end_time - start_time);
 438        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
 439        start_time = end_time;
 440        num_dirty_pages_period = 0;
 441    }
 442}
 443
 444/*
 445 * ram_save_block: Writes a page of memory to the stream f
 446 *
 447 * Returns:  The number of bytes written.
 448 *           0 means no dirty pages
 449 */
 450
 451static int ram_save_block(QEMUFile *f, bool last_stage)
 452{
 453    RAMBlock *block = last_seen_block;
 454    ram_addr_t offset = last_offset;
 455    bool complete_round = false;
 456    int bytes_sent = 0;
 457    MemoryRegion *mr;
 458    ram_addr_t current_addr;
 459
 460    if (!block)
 461        block = QTAILQ_FIRST(&ram_list.blocks);
 462
 463    while (true) {
 464        mr = block->mr;
 465        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
 466        if (complete_round && block == last_seen_block &&
 467            offset >= last_offset) {
 468            break;
 469        }
 470        if (offset >= block->length) {
 471            offset = 0;
 472            block = QTAILQ_NEXT(block, next);
 473            if (!block) {
 474                block = QTAILQ_FIRST(&ram_list.blocks);
 475                complete_round = true;
 476                ram_bulk_stage = false;
 477            }
 478        } else {
 479            int ret;
 480            uint8_t *p;
 481            int cont = (block == last_sent_block) ?
 482                RAM_SAVE_FLAG_CONTINUE : 0;
 483
 484            p = memory_region_get_ram_ptr(mr) + offset;
 485
 486            /* In doubt sent page as normal */
 487            bytes_sent = -1;
 488            ret = ram_control_save_page(f, block->offset,
 489                               offset, TARGET_PAGE_SIZE, &bytes_sent);
 490
 491            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 492                if (ret != RAM_SAVE_CONTROL_DELAYED) {
 493                    if (bytes_sent > 0) {
 494                        acct_info.norm_pages++;
 495                    } else if (bytes_sent == 0) {
 496                        acct_info.dup_pages++;
 497                    }
 498                }
 499            } else if (is_zero_page(p)) {
 500                acct_info.dup_pages++;
 501                bytes_sent = save_block_hdr(f, block, offset, cont,
 502                                            RAM_SAVE_FLAG_COMPRESS);
 503                qemu_put_byte(f, 0);
 504                bytes_sent++;
 505            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
 506                current_addr = block->offset + offset;
 507                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
 508                                              offset, cont, last_stage);
 509                if (!last_stage) {
 510                    p = get_cached_data(XBZRLE.cache, current_addr);
 511                }
 512            }
 513
 514            /* XBZRLE overflow or normal page */
 515            if (bytes_sent == -1) {
 516                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
 517                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
 518                bytes_sent += TARGET_PAGE_SIZE;
 519                acct_info.norm_pages++;
 520            }
 521
 522            /* if page is unmodified, continue to the next */
 523            if (bytes_sent > 0) {
 524                last_sent_block = block;
 525                break;
 526            }
 527        }
 528    }
 529    last_seen_block = block;
 530    last_offset = offset;
 531
 532    return bytes_sent;
 533}
 534
 535static uint64_t bytes_transferred;
 536
 537void acct_update_position(QEMUFile *f, size_t size, bool zero)
 538{
 539    uint64_t pages = size / TARGET_PAGE_SIZE;
 540    if (zero) {
 541        acct_info.dup_pages += pages;
 542    } else {
 543        acct_info.norm_pages += pages;
 544        bytes_transferred += size;
 545        qemu_update_position(f, size);
 546    }
 547}
 548
 549static ram_addr_t ram_save_remaining(void)
 550{
 551    return migration_dirty_pages;
 552}
 553
 554uint64_t ram_bytes_remaining(void)
 555{
 556    return ram_save_remaining() * TARGET_PAGE_SIZE;
 557}
 558
 559uint64_t ram_bytes_transferred(void)
 560{
 561    return bytes_transferred;
 562}
 563
 564uint64_t ram_bytes_total(void)
 565{
 566    RAMBlock *block;
 567    uint64_t total = 0;
 568
 569    QTAILQ_FOREACH(block, &ram_list.blocks, next)
 570        total += block->length;
 571
 572    return total;
 573}
 574
 575static void migration_end(void)
 576{
 577    if (migration_bitmap) {
 578        memory_global_dirty_log_stop();
 579        g_free(migration_bitmap);
 580        migration_bitmap = NULL;
 581    }
 582
 583    if (XBZRLE.cache) {
 584        cache_fini(XBZRLE.cache);
 585        g_free(XBZRLE.cache);
 586        g_free(XBZRLE.encoded_buf);
 587        g_free(XBZRLE.current_buf);
 588        g_free(XBZRLE.decoded_buf);
 589        XBZRLE.cache = NULL;
 590    }
 591}
 592
 593static void ram_migration_cancel(void *opaque)
 594{
 595    migration_end();
 596}
 597
 598static void reset_ram_globals(void)
 599{
 600    last_seen_block = NULL;
 601    last_sent_block = NULL;
 602    last_offset = 0;
 603    last_version = ram_list.version;
 604    ram_bulk_stage = true;
 605}
 606
 607#define MAX_WAIT 50 /* ms, half buffered_file limit */
 608
 609static int ram_save_setup(QEMUFile *f, void *opaque)
 610{
 611    RAMBlock *block;
 612    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
 613
 614    migration_bitmap = bitmap_new(ram_pages);
 615    bitmap_set(migration_bitmap, 0, ram_pages);
 616    migration_dirty_pages = ram_pages;
 617    mig_throttle_on = false;
 618    dirty_rate_high_cnt = 0;
 619
 620    if (migrate_use_xbzrle()) {
 621        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
 622                                  TARGET_PAGE_SIZE,
 623                                  TARGET_PAGE_SIZE);
 624        if (!XBZRLE.cache) {
 625            DPRINTF("Error creating cache\n");
 626            return -1;
 627        }
 628        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
 629        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
 630        acct_clear();
 631    }
 632
 633    qemu_mutex_lock_iothread();
 634    qemu_mutex_lock_ramlist();
 635    bytes_transferred = 0;
 636    reset_ram_globals();
 637
 638    memory_global_dirty_log_start();
 639    migration_bitmap_sync();
 640    qemu_mutex_unlock_iothread();
 641
 642    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 643
 644    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 645        qemu_put_byte(f, strlen(block->idstr));
 646        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
 647        qemu_put_be64(f, block->length);
 648    }
 649
 650    qemu_mutex_unlock_ramlist();
 651
 652    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
 653    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 654
 655    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 656
 657    return 0;
 658}
 659
 660static int ram_save_iterate(QEMUFile *f, void *opaque)
 661{
 662    int ret;
 663    int i;
 664    int64_t t0;
 665    int total_sent = 0;
 666
 667    qemu_mutex_lock_ramlist();
 668
 669    if (ram_list.version != last_version) {
 670        reset_ram_globals();
 671    }
 672
 673    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 674
 675    t0 = qemu_get_clock_ns(rt_clock);
 676    i = 0;
 677    while ((ret = qemu_file_rate_limit(f)) == 0) {
 678        int bytes_sent;
 679
 680        bytes_sent = ram_save_block(f, false);
 681        /* no more blocks to sent */
 682        if (bytes_sent == 0) {
 683            break;
 684        }
 685        total_sent += bytes_sent;
 686        acct_info.iterations++;
 687        check_guest_throttling();
 688        /* we want to check in the 1st loop, just in case it was the 1st time
 689           and we had to sync the dirty bitmap.
 690           qemu_get_clock_ns() is a bit expensive, so we only check each some
 691           iterations
 692        */
 693        if ((i & 63) == 0) {
 694            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
 695            if (t1 > MAX_WAIT) {
 696                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
 697                        t1, i);
 698                break;
 699            }
 700        }
 701        i++;
 702    }
 703
 704    qemu_mutex_unlock_ramlist();
 705
 706    /*
 707     * Must occur before EOS (or any QEMUFile operation)
 708     * because of RDMA protocol.
 709     */
 710    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 711
 712    if (ret < 0) {
 713        bytes_transferred += total_sent;
 714        return ret;
 715    }
 716
 717    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 718    total_sent += 8;
 719    bytes_transferred += total_sent;
 720
 721    return total_sent;
 722}
 723
 724static int ram_save_complete(QEMUFile *f, void *opaque)
 725{
 726    qemu_mutex_lock_ramlist();
 727    migration_bitmap_sync();
 728
 729    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
 730
 731    /* try transferring iterative blocks of memory */
 732
 733    /* flush all remaining blocks regardless of rate limiting */
 734    while (true) {
 735        int bytes_sent;
 736
 737        bytes_sent = ram_save_block(f, true);
 738        /* no more blocks to sent */
 739        if (bytes_sent == 0) {
 740            break;
 741        }
 742        bytes_transferred += bytes_sent;
 743    }
 744
 745    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
 746    migration_end();
 747
 748    qemu_mutex_unlock_ramlist();
 749    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 750
 751    return 0;
 752}
 753
 754static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
 755{
 756    uint64_t remaining_size;
 757
 758    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
 759
 760    if (remaining_size < max_size) {
 761        qemu_mutex_lock_iothread();
 762        migration_bitmap_sync();
 763        qemu_mutex_unlock_iothread();
 764        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
 765    }
 766    return remaining_size;
 767}
 768
 769static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
 770{
 771    int ret, rc = 0;
 772    unsigned int xh_len;
 773    int xh_flags;
 774
 775    if (!XBZRLE.decoded_buf) {
 776        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
 777    }
 778
 779    /* extract RLE header */
 780    xh_flags = qemu_get_byte(f);
 781    xh_len = qemu_get_be16(f);
 782
 783    if (xh_flags != ENCODING_FLAG_XBZRLE) {
 784        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
 785        return -1;
 786    }
 787
 788    if (xh_len > TARGET_PAGE_SIZE) {
 789        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
 790        return -1;
 791    }
 792    /* load data and decode */
 793    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
 794
 795    /* decode RLE */
 796    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
 797                               TARGET_PAGE_SIZE);
 798    if (ret == -1) {
 799        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
 800        rc = -1;
 801    } else  if (ret > TARGET_PAGE_SIZE) {
 802        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
 803                ret, TARGET_PAGE_SIZE);
 804        abort();
 805    }
 806
 807    return rc;
 808}
 809
 810static inline void *host_from_stream_offset(QEMUFile *f,
 811                                            ram_addr_t offset,
 812                                            int flags)
 813{
 814    static RAMBlock *block = NULL;
 815    char id[256];
 816    uint8_t len;
 817
 818    if (flags & RAM_SAVE_FLAG_CONTINUE) {
 819        if (!block) {
 820            fprintf(stderr, "Ack, bad migration stream!\n");
 821            return NULL;
 822        }
 823
 824        return memory_region_get_ram_ptr(block->mr) + offset;
 825    }
 826
 827    len = qemu_get_byte(f);
 828    qemu_get_buffer(f, (uint8_t *)id, len);
 829    id[len] = 0;
 830
 831    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 832        if (!strncmp(id, block->idstr, sizeof(id)))
 833            return memory_region_get_ram_ptr(block->mr) + offset;
 834    }
 835
 836    fprintf(stderr, "Can't find block %s!\n", id);
 837    return NULL;
 838}
 839
 840/*
 841 * If a page (or a whole RDMA chunk) has been
 842 * determined to be zero, then zap it.
 843 */
 844void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 845{
 846    if (ch != 0 || !is_zero_page(host)) {
 847        memset(host, ch, size);
 848#ifndef _WIN32
 849        if (ch == 0 &&
 850            (!kvm_enabled() || kvm_has_sync_mmu()) &&
 851            getpagesize() <= TARGET_PAGE_SIZE) {
 852            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
 853        }
 854#endif
 855    }
 856}
 857
 858static int ram_load(QEMUFile *f, void *opaque, int version_id)
 859{
 860    ram_addr_t addr;
 861    int flags, ret = 0;
 862    int error;
 863    static uint64_t seq_iter;
 864
 865    seq_iter++;
 866
 867    if (version_id < 4 || version_id > 4) {
 868        return -EINVAL;
 869    }
 870
 871    do {
 872        addr = qemu_get_be64(f);
 873
 874        flags = addr & ~TARGET_PAGE_MASK;
 875        addr &= TARGET_PAGE_MASK;
 876
 877        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
 878            if (version_id == 4) {
 879                /* Synchronize RAM block list */
 880                char id[256];
 881                ram_addr_t length;
 882                ram_addr_t total_ram_bytes = addr;
 883
 884                while (total_ram_bytes) {
 885                    RAMBlock *block;
 886                    uint8_t len;
 887
 888                    len = qemu_get_byte(f);
 889                    qemu_get_buffer(f, (uint8_t *)id, len);
 890                    id[len] = 0;
 891                    length = qemu_get_be64(f);
 892
 893                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 894                        if (!strncmp(id, block->idstr, sizeof(id))) {
 895                            if (block->length != length) {
 896                                fprintf(stderr,
 897                                        "Length mismatch: %s: " RAM_ADDR_FMT
 898                                        " in != " RAM_ADDR_FMT "\n", id, length,
 899                                        block->length);
 900                                ret =  -EINVAL;
 901                                goto done;
 902                            }
 903                            break;
 904                        }
 905                    }
 906
 907                    if (!block) {
 908                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
 909                                "accept migration\n", id);
 910                        ret = -EINVAL;
 911                        goto done;
 912                    }
 913
 914                    total_ram_bytes -= length;
 915                }
 916            }
 917        }
 918
 919        if (flags & RAM_SAVE_FLAG_COMPRESS) {
 920            void *host;
 921            uint8_t ch;
 922
 923            host = host_from_stream_offset(f, addr, flags);
 924            if (!host) {
 925                return -EINVAL;
 926            }
 927
 928            ch = qemu_get_byte(f);
 929            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
 930        } else if (flags & RAM_SAVE_FLAG_PAGE) {
 931            void *host;
 932
 933            host = host_from_stream_offset(f, addr, flags);
 934            if (!host) {
 935                return -EINVAL;
 936            }
 937
 938            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
 939        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
 940            void *host = host_from_stream_offset(f, addr, flags);
 941            if (!host) {
 942                return -EINVAL;
 943            }
 944
 945            if (load_xbzrle(f, addr, host) < 0) {
 946                ret = -EINVAL;
 947                goto done;
 948            }
 949        } else if (flags & RAM_SAVE_FLAG_HOOK) {
 950            ram_control_load_hook(f, flags);
 951        }
 952        error = qemu_file_get_error(f);
 953        if (error) {
 954            ret = error;
 955            goto done;
 956        }
 957    } while (!(flags & RAM_SAVE_FLAG_EOS));
 958
 959done:
 960    DPRINTF("Completed load of VM with exit code %d seq iteration "
 961            "%" PRIu64 "\n", ret, seq_iter);
 962    return ret;
 963}
 964
 965SaveVMHandlers savevm_ram_handlers = {
 966    .save_live_setup = ram_save_setup,
 967    .save_live_iterate = ram_save_iterate,
 968    .save_live_complete = ram_save_complete,
 969    .save_live_pending = ram_save_pending,
 970    .load_state = ram_load,
 971    .cancel = ram_migration_cancel,
 972};
 973
 974struct soundhw {
 975    const char *name;
 976    const char *descr;
 977    int enabled;
 978    int isa;
 979    union {
 980        int (*init_isa) (ISABus *bus);
 981        int (*init_pci) (PCIBus *bus);
 982    } init;
 983};
 984
 985static struct soundhw soundhw[9];
 986static int soundhw_count;
 987
 988void isa_register_soundhw(const char *name, const char *descr,
 989                          int (*init_isa)(ISABus *bus))
 990{
 991    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
 992    soundhw[soundhw_count].name = name;
 993    soundhw[soundhw_count].descr = descr;
 994    soundhw[soundhw_count].isa = 1;
 995    soundhw[soundhw_count].init.init_isa = init_isa;
 996    soundhw_count++;
 997}
 998
 999void pci_register_soundhw(const char *name, const char *descr,
1000                          int (*init_pci)(PCIBus *bus))
1001{
1002    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
1003    soundhw[soundhw_count].name = name;
1004    soundhw[soundhw_count].descr = descr;
1005    soundhw[soundhw_count].isa = 0;
1006    soundhw[soundhw_count].init.init_pci = init_pci;
1007    soundhw_count++;
1008}
1009
1010void select_soundhw(const char *optarg)
1011{
1012    struct soundhw *c;
1013
1014    if (is_help_option(optarg)) {
1015    show_valid_cards:
1016
1017        if (soundhw_count) {
1018             printf("Valid sound card names (comma separated):\n");
1019             for (c = soundhw; c->name; ++c) {
1020                 printf ("%-11s %s\n", c->name, c->descr);
1021             }
1022             printf("\n-soundhw all will enable all of the above\n");
1023        } else {
1024             printf("Machine has no user-selectable audio hardware "
1025                    "(it may or may not have always-present audio hardware).\n");
1026        }
1027        exit(!is_help_option(optarg));
1028    }
1029    else {
1030        size_t l;
1031        const char *p;
1032        char *e;
1033        int bad_card = 0;
1034
1035        if (!strcmp(optarg, "all")) {
1036            for (c = soundhw; c->name; ++c) {
1037                c->enabled = 1;
1038            }
1039            return;
1040        }
1041
1042        p = optarg;
1043        while (*p) {
1044            e = strchr(p, ',');
1045            l = !e ? strlen(p) : (size_t) (e - p);
1046
1047            for (c = soundhw; c->name; ++c) {
1048                if (!strncmp(c->name, p, l) && !c->name[l]) {
1049                    c->enabled = 1;
1050                    break;
1051                }
1052            }
1053
1054            if (!c->name) {
1055                if (l > 80) {
1056                    fprintf(stderr,
1057                            "Unknown sound card name (too big to show)\n");
1058                }
1059                else {
1060                    fprintf(stderr, "Unknown sound card name `%.*s'\n",
1061                            (int) l, p);
1062                }
1063                bad_card = 1;
1064            }
1065            p += l + (e != NULL);
1066        }
1067
1068        if (bad_card) {
1069            goto show_valid_cards;
1070        }
1071    }
1072}
1073
1074void audio_init(void)
1075{
1076    struct soundhw *c;
1077    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
1078    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
1079
1080    for (c = soundhw; c->name; ++c) {
1081        if (c->enabled) {
1082            if (c->isa) {
1083                if (!isa_bus) {
1084                    fprintf(stderr, "ISA bus not available for %s\n", c->name);
1085                    exit(1);
1086                }
1087                c->init.init_isa(isa_bus);
1088            } else {
1089                if (!pci_bus) {
1090                    fprintf(stderr, "PCI bus not available for %s\n", c->name);
1091                    exit(1);
1092                }
1093                c->init.init_pci(pci_bus);
1094            }
1095        }
1096    }
1097}
1098
1099int qemu_uuid_parse(const char *str, uint8_t *uuid)
1100{
1101    int ret;
1102
1103    if (strlen(str) != 36) {
1104        return -1;
1105    }
1106
1107    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
1108                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
1109                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
1110                 &uuid[15]);
1111
1112    if (ret != 16) {
1113        return -1;
1114    }
1115#ifdef TARGET_I386
1116    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16);
1117#endif
1118    return 0;
1119}
1120
1121void do_acpitable_option(const QemuOpts *opts)
1122{
1123#ifdef TARGET_I386
1124    Error *err = NULL;
1125
1126    acpi_table_add(opts, &err);
1127    if (err) {
1128        fprintf(stderr, "Wrong acpi table provided: %s\n",
1129                error_get_pretty(err));
1130        error_free(err);
1131        exit(1);
1132    }
1133#endif
1134}
1135
1136void do_smbios_option(const char *optarg)
1137{
1138#ifdef TARGET_I386
1139    if (smbios_entry_add(optarg) < 0) {
1140        exit(1);
1141    }
1142#endif
1143}
1144
1145void cpudef_init(void)
1146{
1147#if defined(cpudef_setup)
1148    cpudef_setup(); /* parse cpu definitions in target config file */
1149#endif
1150}
1151
1152int tcg_available(void)
1153{
1154    return 1;
1155}
1156
1157int kvm_available(void)
1158{
1159#ifdef CONFIG_KVM
1160    return 1;
1161#else
1162    return 0;
1163#endif
1164}
1165
1166int xen_available(void)
1167{
1168#ifdef CONFIG_XEN
1169    return 1;
1170#else
1171    return 0;
1172#endif
1173}
1174
1175
1176TargetInfo *qmp_query_target(Error **errp)
1177{
1178    TargetInfo *info = g_malloc0(sizeof(*info));
1179
1180    info->arch = g_strdup(TARGET_NAME);
1181
1182    return info;
1183}
1184
1185/* Stub function that's gets run on the vcpu when its brought out of the
1186   VM to run inside qemu via async_run_on_cpu()*/
1187static void mig_sleep_cpu(void *opq)
1188{
1189    qemu_mutex_unlock_iothread();
1190    g_usleep(30*1000);
1191    qemu_mutex_lock_iothread();
1192}
1193
1194/* To reduce the dirty rate explicitly disallow the VCPUs from spending
1195   much time in the VM. The migration thread will try to catchup.
1196   Workload will experience a performance drop.
1197*/
1198static void mig_throttle_cpu_down(CPUState *cpu, void *data)
1199{
1200    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
1201}
1202
1203static void mig_throttle_guest_down(void)
1204{
1205    qemu_mutex_lock_iothread();
1206    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
1207    qemu_mutex_unlock_iothread();
1208}
1209
1210static void check_guest_throttling(void)
1211{
1212    static int64_t t0;
1213    int64_t        t1;
1214
1215    if (!mig_throttle_on) {
1216        return;
1217    }
1218
1219    if (!t0)  {
1220        t0 = qemu_get_clock_ns(rt_clock);
1221        return;
1222    }
1223
1224    t1 = qemu_get_clock_ns(rt_clock);
1225
1226    /* If it has been more than 40 ms since the last time the guest
1227     * was throttled then do it again.
1228     */
1229    if (40 < (t1-t0)/1000000) {
1230        mig_throttle_guest_down();
1231        t0 = t1;
1232    }
1233}
1234