qemu/migration/migration.h
<<
>>
Prefs
   1/*
   2 * QEMU live migration
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#ifndef QEMU_MIGRATION_H
  15#define QEMU_MIGRATION_H
  16
  17#include "exec/cpu-common.h"
  18#include "hw/qdev-core.h"
  19#include "qapi/qapi-types-migration.h"
  20#include "qemu/thread.h"
  21#include "qemu/coroutine_int.h"
  22#include "io/channel.h"
  23#include "io/channel-buffer.h"
  24#include "net/announce.h"
  25#include "qom/object.h"
  26
  27struct PostcopyBlocktimeContext;
  28
  29#define  MIGRATION_RESUME_ACK_VALUE  (1)
  30
  31/*
  32 * 1<<6=64 pages -> 256K chunk when page size is 4K.  This gives us
  33 * the benefit that all the chunks are 64 pages aligned then the
  34 * bitmaps are always aligned to LONG.
  35 */
  36#define CLEAR_BITMAP_SHIFT_MIN             6
  37/*
  38 * 1<<18=256K pages -> 1G chunk when page size is 4K.  This is the
  39 * default value to use if no one specified.
  40 */
  41#define CLEAR_BITMAP_SHIFT_DEFAULT        18
  42/*
  43 * 1<<31=2G pages -> 8T chunk when page size is 4K.  This should be
  44 * big enough and make sure we won't overflow easily.
  45 */
  46#define CLEAR_BITMAP_SHIFT_MAX            31
  47
  48/* State for the incoming migration */
  49struct MigrationIncomingState {
  50    QEMUFile *from_src_file;
  51
  52    /*
  53     * Free at the start of the main state load, set as the main thread finishes
  54     * loading state.
  55     */
  56    QemuEvent main_thread_load_event;
  57
  58    /* For network announces */
  59    AnnounceTimer  announce_timer;
  60
  61    size_t         largest_page_size;
  62    bool           have_fault_thread;
  63    QemuThread     fault_thread;
  64    QemuSemaphore  fault_thread_sem;
  65    /* Set this when we want the fault thread to quit */
  66    bool           fault_thread_quit;
  67
  68    bool           have_listen_thread;
  69    QemuThread     listen_thread;
  70    QemuSemaphore  listen_thread_sem;
  71
  72    /* For the kernel to send us notifications */
  73    int       userfault_fd;
  74    /* To notify the fault_thread to wake, e.g., when need to quit */
  75    int       userfault_event_fd;
  76    QEMUFile *to_src_file;
  77    QemuMutex rp_mutex;    /* We send replies from multiple threads */
  78    /* RAMBlock of last request sent to source */
  79    RAMBlock *last_rb;
  80    void     *postcopy_tmp_page;
  81    void     *postcopy_tmp_zero_page;
  82    /* PostCopyFD's for external userfaultfds & handlers of shared memory */
  83    GArray   *postcopy_remote_fds;
  84
  85    QEMUBH *bh;
  86
  87    int state;
  88
  89    bool have_colo_incoming_thread;
  90    QemuThread colo_incoming_thread;
  91    /* The coroutine we should enter (back) after failover */
  92    Coroutine *migration_incoming_co;
  93    QemuSemaphore colo_incoming_sem;
  94
  95    /*
  96     * PostcopyBlocktimeContext to keep information for postcopy
  97     * live migration, to calculate vCPU block time
  98     * */
  99    struct PostcopyBlocktimeContext *blocktime_ctx;
 100
 101    /* notify PAUSED postcopy incoming migrations to try to continue */
 102    bool postcopy_recover_triggered;
 103    QemuSemaphore postcopy_pause_sem_dst;
 104    QemuSemaphore postcopy_pause_sem_fault;
 105
 106    /* List of listening socket addresses  */
 107    SocketAddressList *socket_address_list;
 108
 109    /* A tree of pages that we requested to the source VM */
 110    GTree *page_requested;
 111    /* For debugging purpose only, but would be nice to keep */
 112    int page_requested_count;
 113    /*
 114     * The mutex helps to maintain the requested pages that we sent to the
 115     * source, IOW, to guarantee coherent between the page_requests tree and
 116     * the per-ramblock receivedmap.  Note! This does not guarantee consistency
 117     * of the real page copy procedures (using UFFDIO_[ZERO]COPY).  E.g., even
 118     * if one bit in receivedmap is cleared, UFFDIO_COPY could have happened
 119     * for that page already.  This is intended so that the mutex won't
 120     * serialize and blocked by slow operations like UFFDIO_* ioctls.  However
 121     * this should be enough to make sure the page_requested tree always
 122     * contains valid information.
 123     */
 124    QemuMutex page_request_mutex;
 125};
 126
 127MigrationIncomingState *migration_incoming_get_current(void);
 128void migration_incoming_state_destroy(void);
 129/*
 130 * Functions to work with blocktime context
 131 */
 132void fill_destination_postcopy_migration_info(MigrationInfo *info);
 133
 134#define TYPE_MIGRATION "migration"
 135
 136typedef struct MigrationClass MigrationClass;
 137DECLARE_OBJ_CHECKERS(MigrationState, MigrationClass,
 138                     MIGRATION_OBJ, TYPE_MIGRATION)
 139
 140struct MigrationClass {
 141    /*< private >*/
 142    DeviceClass parent_class;
 143};
 144
 145struct MigrationState {
 146    /*< private >*/
 147    DeviceState parent_obj;
 148
 149    /*< public >*/
 150    QemuThread thread;
 151    QEMUBH *vm_start_bh;
 152    QEMUBH *cleanup_bh;
 153    QEMUFile *to_dst_file;
 154    QIOChannelBuffer *bioc;
 155    /*
 156     * Protects to_dst_file pointer.  We need to make sure we won't
 157     * yield or hang during the critical section, since this lock will
 158     * be used in OOB command handler.
 159     */
 160    QemuMutex qemu_file_lock;
 161
 162    /*
 163     * Used to allow urgent requests to override rate limiting.
 164     */
 165    QemuSemaphore rate_limit_sem;
 166
 167    /* pages already send at the beginning of current iteration */
 168    uint64_t iteration_initial_pages;
 169
 170    /* pages transferred per second */
 171    double pages_per_second;
 172
 173    /* bytes already send at the beginning of current iteration */
 174    uint64_t iteration_initial_bytes;
 175    /* time at the start of current iteration */
 176    int64_t iteration_start_time;
 177    /*
 178     * The final stage happens when the remaining data is smaller than
 179     * this threshold; it's calculated from the requested downtime and
 180     * measured bandwidth
 181     */
 182    int64_t threshold_size;
 183
 184    /* params from 'migrate-set-parameters' */
 185    MigrationParameters parameters;
 186
 187    int state;
 188
 189    /* State related to return path */
 190    struct {
 191        QEMUFile     *from_dst_file;
 192        QemuThread    rp_thread;
 193        bool          error;
 194        QemuSemaphore rp_sem;
 195    } rp_state;
 196
 197    double mbps;
 198    /* Timestamp when recent migration starts (ms) */
 199    int64_t start_time;
 200    /* Total time used by latest migration (ms) */
 201    int64_t total_time;
 202    /* Timestamp when VM is down (ms) to migrate the last stuff */
 203    int64_t downtime_start;
 204    int64_t downtime;
 205    int64_t expected_downtime;
 206    bool enabled_capabilities[MIGRATION_CAPABILITY__MAX];
 207    int64_t setup_time;
 208    /*
 209     * Whether guest was running when we enter the completion stage.
 210     * If migration is interrupted by any reason, we need to continue
 211     * running the guest on source.
 212     */
 213    bool vm_was_running;
 214
 215    /* Flag set once the migration has been asked to enter postcopy */
 216    bool start_postcopy;
 217    /* Flag set after postcopy has sent the device state */
 218    bool postcopy_after_devices;
 219
 220    /* Flag set once the migration thread is running (and needs joining) */
 221    bool migration_thread_running;
 222
 223    /* Flag set once the migration thread called bdrv_inactivate_all */
 224    bool block_inactive;
 225
 226    /* Migration is waiting for guest to unplug device */
 227    QemuSemaphore wait_unplug_sem;
 228
 229    /* Migration is paused due to pause-before-switchover */
 230    QemuSemaphore pause_sem;
 231
 232    /* The semaphore is used to notify COLO thread that failover is finished */
 233    QemuSemaphore colo_exit_sem;
 234
 235    /* The event is used to notify COLO thread to do checkpoint */
 236    QemuEvent colo_checkpoint_event;
 237    int64_t colo_checkpoint_time;
 238    QEMUTimer *colo_delay_timer;
 239
 240    /* The first error that has occurred.
 241       We used the mutex to be able to return the 1st error message */
 242    Error *error;
 243    /* mutex to protect errp */
 244    QemuMutex error_mutex;
 245
 246    /* Do we have to clean up -b/-i from old migrate parameters */
 247    /* This feature is deprecated and will be removed */
 248    bool must_remove_block_options;
 249
 250    /*
 251     * Global switch on whether we need to store the global state
 252     * during migration.
 253     */
 254    bool store_global_state;
 255
 256    /* Whether we send QEMU_VM_CONFIGURATION during migration */
 257    bool send_configuration;
 258    /* Whether we send section footer during migration */
 259    bool send_section_footer;
 260
 261    /* Needed by postcopy-pause state */
 262    QemuSemaphore postcopy_pause_sem;
 263    QemuSemaphore postcopy_pause_rp_sem;
 264    /*
 265     * Whether we abort the migration if decompression errors are
 266     * detected at the destination. It is left at false for qemu
 267     * older than 3.0, since only newer qemu sends streams that
 268     * do not trigger spurious decompression errors.
 269     */
 270    bool decompress_error_check;
 271
 272    /*
 273     * This decides the size of guest memory chunk that will be used
 274     * to track dirty bitmap clearing.  The size of memory chunk will
 275     * be GUEST_PAGE_SIZE << N.  Say, N=0 means we will clear dirty
 276     * bitmap for each page to send (1<<0=1); N=10 means we will clear
 277     * dirty bitmap only once for 1<<10=1K continuous guest pages
 278     * (which is in 4M chunk).
 279     */
 280    uint8_t clear_bitmap_shift;
 281
 282    /*
 283     * This save hostname when out-going migration starts
 284     */
 285    char *hostname;
 286};
 287
 288void migrate_set_state(int *state, int old_state, int new_state);
 289
 290void migration_fd_process_incoming(QEMUFile *f, Error **errp);
 291void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
 292void migration_incoming_process(void);
 293
 294bool  migration_has_all_channels(void);
 295
 296uint64_t migrate_max_downtime(void);
 297
 298void migrate_set_error(MigrationState *s, const Error *error);
 299void migrate_fd_error(MigrationState *s, const Error *error);
 300
 301void migrate_fd_connect(MigrationState *s, Error *error_in);
 302
 303bool migration_is_setup_or_active(int state);
 304bool migration_is_running(int state);
 305
 306void migrate_init(MigrationState *s);
 307bool migration_is_blocked(Error **errp);
 308/* True if outgoing migration has entered postcopy phase */
 309bool migration_in_postcopy(void);
 310MigrationState *migrate_get_current(void);
 311
 312bool migrate_postcopy(void);
 313
 314bool migrate_release_ram(void);
 315bool migrate_postcopy_ram(void);
 316bool migrate_zero_blocks(void);
 317bool migrate_dirty_bitmaps(void);
 318bool migrate_ignore_shared(void);
 319bool migrate_validate_uuid(void);
 320
 321bool migrate_auto_converge(void);
 322bool migrate_use_multifd(void);
 323bool migrate_pause_before_switchover(void);
 324int migrate_multifd_channels(void);
 325MultiFDCompression migrate_multifd_compression(void);
 326int migrate_multifd_zlib_level(void);
 327int migrate_multifd_zstd_level(void);
 328
 329int migrate_use_xbzrle(void);
 330uint64_t migrate_xbzrle_cache_size(void);
 331bool migrate_colo_enabled(void);
 332
 333bool migrate_use_block(void);
 334bool migrate_use_block_incremental(void);
 335int migrate_max_cpu_throttle(void);
 336bool migrate_use_return_path(void);
 337
 338uint64_t ram_get_total_transferred_pages(void);
 339
 340bool migrate_use_compression(void);
 341int migrate_compress_level(void);
 342int migrate_compress_threads(void);
 343int migrate_compress_wait_thread(void);
 344int migrate_decompress_threads(void);
 345bool migrate_use_events(void);
 346bool migrate_postcopy_blocktime(void);
 347bool migrate_background_snapshot(void);
 348
 349/* Sending on the return path - generic and then for each message type */
 350void migrate_send_rp_shut(MigrationIncomingState *mis,
 351                          uint32_t value);
 352void migrate_send_rp_pong(MigrationIncomingState *mis,
 353                          uint32_t value);
 354int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
 355                              ram_addr_t start, uint64_t haddr);
 356int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 357                                      RAMBlock *rb, ram_addr_t start);
 358void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
 359                                 char *block_name);
 360void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
 361
 362void dirty_bitmap_mig_before_vm_start(void);
 363void dirty_bitmap_mig_cancel_outgoing(void);
 364void dirty_bitmap_mig_cancel_incoming(void);
 365bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm,
 366                                      Error **errp);
 367
 368void migrate_add_address(SocketAddress *address);
 369
 370int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
 371
 372#define qemu_ram_foreach_block \
 373  #warning "Use foreach_not_ignored_block in migration code"
 374
 375void migration_make_urgent_request(void);
 376void migration_consume_urgent_request(void);
 377bool migration_rate_limit(void);
 378
 379#endif
 380