qemu/migration/multifd-nocomp.c
<<
>>
Prefs
   1/*
   2 * Multifd RAM migration without compression
   3 *
   4 * Copyright (c) 2019-2020 Red Hat Inc
   5 *
   6 * Authors:
   7 *  Juan Quintela <quintela@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "system/ramblock.h"
  15#include "exec/target_page.h"
  16#include "file.h"
  17#include "migration-stats.h"
  18#include "multifd.h"
  19#include "options.h"
  20#include "migration.h"
  21#include "qapi/error.h"
  22#include "qemu/cutils.h"
  23#include "qemu/error-report.h"
  24#include "trace.h"
  25#include "qemu-file.h"
  26
  27static MultiFDSendData *multifd_ram_send;
  28
  29void multifd_ram_payload_alloc(MultiFDPages_t *pages)
  30{
  31    pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
  32}
  33
  34void multifd_ram_payload_free(MultiFDPages_t *pages)
  35{
  36    g_clear_pointer(&pages->offset, g_free);
  37}
  38
  39void multifd_ram_save_setup(void)
  40{
  41    multifd_ram_send = multifd_send_data_alloc();
  42}
  43
  44void multifd_ram_save_cleanup(void)
  45{
  46    g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
  47}
  48
  49static void multifd_set_file_bitmap(MultiFDSendParams *p)
  50{
  51    MultiFDPages_t *pages = &p->data->u.ram;
  52
  53    assert(pages->block);
  54
  55    for (int i = 0; i < pages->normal_num; i++) {
  56        ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
  57    }
  58
  59    for (int i = pages->normal_num; i < pages->num; i++) {
  60        ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
  61    }
  62}
  63
  64static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
  65{
  66    uint32_t page_count = multifd_ram_page_count();
  67
  68    if (migrate_zero_copy_send()) {
  69        p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
  70    }
  71
  72    if (!migrate_mapped_ram()) {
  73        /* We need one extra place for the packet header */
  74        p->iov = g_new0(struct iovec, page_count + 1);
  75    } else {
  76        p->iov = g_new0(struct iovec, page_count);
  77    }
  78
  79    return 0;
  80}
  81
  82static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
  83{
  84    g_free(p->iov);
  85    p->iov = NULL;
  86}
  87
  88static void multifd_ram_prepare_header(MultiFDSendParams *p)
  89{
  90    p->iov[0].iov_len = p->packet_len;
  91    p->iov[0].iov_base = p->packet;
  92    p->iovs_num++;
  93}
  94
  95static void multifd_send_prepare_iovs(MultiFDSendParams *p)
  96{
  97    MultiFDPages_t *pages = &p->data->u.ram;
  98    uint32_t page_size = multifd_ram_page_size();
  99
 100    for (int i = 0; i < pages->normal_num; i++) {
 101        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
 102        p->iov[p->iovs_num].iov_len = page_size;
 103        p->iovs_num++;
 104    }
 105
 106    p->next_packet_size = pages->normal_num * page_size;
 107}
 108
 109static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
 110{
 111    bool use_zero_copy_send = migrate_zero_copy_send();
 112    int ret;
 113
 114    multifd_send_zero_page_detect(p);
 115
 116    if (migrate_mapped_ram()) {
 117        multifd_send_prepare_iovs(p);
 118        multifd_set_file_bitmap(p);
 119
 120        return 0;
 121    }
 122
 123    if (!use_zero_copy_send) {
 124        /*
 125         * Only !zerocopy needs the header in IOV; zerocopy will
 126         * send it separately.
 127         */
 128        multifd_ram_prepare_header(p);
 129    }
 130
 131    multifd_send_prepare_iovs(p);
 132    p->flags |= MULTIFD_FLAG_NOCOMP;
 133
 134    multifd_send_fill_packet(p);
 135
 136    if (use_zero_copy_send) {
 137        /* Send header first, without zerocopy */
 138        ret = qio_channel_write_all(p->c, (void *)p->packet,
 139                                    p->packet_len, errp);
 140        if (ret != 0) {
 141            return -1;
 142        }
 143
 144        stat64_add(&mig_stats.multifd_bytes, p->packet_len);
 145    }
 146
 147    return 0;
 148}
 149
 150static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
 151{
 152    p->iov = g_new0(struct iovec, multifd_ram_page_count());
 153    return 0;
 154}
 155
 156static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
 157{
 158    g_free(p->iov);
 159    p->iov = NULL;
 160}
 161
 162static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
 163{
 164    uint32_t flags;
 165
 166    if (migrate_mapped_ram()) {
 167        return multifd_file_recv_data(p, errp);
 168    }
 169
 170    flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
 171
 172    if (flags != MULTIFD_FLAG_NOCOMP) {
 173        error_setg(errp, "multifd %u: flags received %x flags expected %x",
 174                   p->id, flags, MULTIFD_FLAG_NOCOMP);
 175        return -1;
 176    }
 177
 178    multifd_recv_zero_page_process(p);
 179
 180    if (!p->normal_num) {
 181        return 0;
 182    }
 183
 184    for (int i = 0; i < p->normal_num; i++) {
 185        p->iov[i].iov_base = p->host + p->normal[i];
 186        p->iov[i].iov_len = multifd_ram_page_size();
 187        ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
 188    }
 189    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
 190}
 191
 192static void multifd_pages_reset(MultiFDPages_t *pages)
 193{
 194    /*
 195     * We don't need to touch offset[] array, because it will be
 196     * overwritten later when reused.
 197     */
 198    pages->num = 0;
 199    pages->normal_num = 0;
 200    pages->block = NULL;
 201}
 202
 203void multifd_ram_fill_packet(MultiFDSendParams *p)
 204{
 205    MultiFDPacket_t *packet = p->packet;
 206    MultiFDPages_t *pages = &p->data->u.ram;
 207    uint32_t zero_num = pages->num - pages->normal_num;
 208
 209    packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
 210    packet->normal_pages = cpu_to_be32(pages->normal_num);
 211    packet->zero_pages = cpu_to_be32(zero_num);
 212
 213    if (pages->block) {
 214        pstrcpy(packet->ramblock, sizeof(packet->ramblock),
 215                pages->block->idstr);
 216    }
 217
 218    for (int i = 0; i < pages->num; i++) {
 219        /* there are architectures where ram_addr_t is 32 bit */
 220        uint64_t temp = pages->offset[i];
 221
 222        packet->offset[i] = cpu_to_be64(temp);
 223    }
 224
 225    trace_multifd_send_ram_fill(p->id, pages->normal_num,
 226                                zero_num);
 227}
 228
 229int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
 230{
 231    MultiFDPacket_t *packet = p->packet;
 232    uint32_t page_count = multifd_ram_page_count();
 233    uint32_t page_size = multifd_ram_page_size();
 234    uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
 235    int i;
 236
 237    if (pages_per_packet > page_count) {
 238        error_setg(errp, "multifd: received packet with %u pages, expected %u",
 239                   pages_per_packet, page_count);
 240        return -1;
 241    }
 242
 243    p->normal_num = be32_to_cpu(packet->normal_pages);
 244    if (p->normal_num > pages_per_packet) {
 245        error_setg(errp, "multifd: received packet with %u non-zero pages, "
 246                   "which exceeds maximum expected pages %u",
 247                   p->normal_num, pages_per_packet);
 248        return -1;
 249    }
 250
 251    p->zero_num = be32_to_cpu(packet->zero_pages);
 252    if (p->zero_num > pages_per_packet - p->normal_num) {
 253        error_setg(errp,
 254                   "multifd: received packet with %u zero pages, expected maximum %u",
 255                   p->zero_num, pages_per_packet - p->normal_num);
 256        return -1;
 257    }
 258
 259    if (p->normal_num == 0 && p->zero_num == 0) {
 260        return 0;
 261    }
 262
 263    /* make sure that ramblock is 0 terminated */
 264    packet->ramblock[255] = 0;
 265    p->block = qemu_ram_block_by_name(packet->ramblock);
 266    if (!p->block) {
 267        error_setg(errp, "multifd: unknown ram block %s",
 268                   packet->ramblock);
 269        return -1;
 270    }
 271
 272    p->host = p->block->host;
 273    for (i = 0; i < p->normal_num; i++) {
 274        uint64_t offset = be64_to_cpu(packet->offset[i]);
 275
 276        if (offset > (p->block->used_length - page_size)) {
 277            error_setg(errp, "multifd: offset too long %" PRIu64
 278                       " (max " RAM_ADDR_FMT ")",
 279                       offset, p->block->used_length);
 280            return -1;
 281        }
 282        p->normal[i] = offset;
 283    }
 284
 285    for (i = 0; i < p->zero_num; i++) {
 286        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
 287
 288        if (offset > (p->block->used_length - page_size)) {
 289            error_setg(errp, "multifd: offset too long %" PRIu64
 290                       " (max " RAM_ADDR_FMT ")",
 291                       offset, p->block->used_length);
 292            return -1;
 293        }
 294        p->zero[i] = offset;
 295    }
 296
 297    return 0;
 298}
 299
 300static inline bool multifd_queue_empty(MultiFDPages_t *pages)
 301{
 302    return pages->num == 0;
 303}
 304
 305static inline bool multifd_queue_full(MultiFDPages_t *pages)
 306{
 307    return pages->num == multifd_ram_page_count();
 308}
 309
 310static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
 311{
 312    pages->offset[pages->num++] = offset;
 313}
 314
 315/* Returns true if enqueue successful, false otherwise */
 316bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
 317{
 318    MultiFDPages_t *pages;
 319
 320retry:
 321    pages = &multifd_ram_send->u.ram;
 322
 323    if (multifd_payload_empty(multifd_ram_send)) {
 324        multifd_pages_reset(pages);
 325        multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
 326    }
 327
 328    /* If the queue is empty, we can already enqueue now */
 329    if (multifd_queue_empty(pages)) {
 330        pages->block = block;
 331        multifd_enqueue(pages, offset);
 332        return true;
 333    }
 334
 335    /*
 336     * Not empty, meanwhile we need a flush.  It can because of either:
 337     *
 338     * (1) The page is not on the same ramblock of previous ones, or,
 339     * (2) The queue is full.
 340     *
 341     * After flush, always retry.
 342     */
 343    if (pages->block != block || multifd_queue_full(pages)) {
 344        if (!multifd_send(&multifd_ram_send)) {
 345            return false;
 346        }
 347        goto retry;
 348    }
 349
 350    /* Not empty, and we still have space, do it! */
 351    multifd_enqueue(pages, offset);
 352    return true;
 353}
 354
 355/*
 356 * We have two modes for multifd flushes:
 357 *
 358 * - Per-section mode: this is the legacy way to flush, it requires one
 359 *   MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
 360 *
 361 * - Per-round mode: this is the modern way to flush, it requires one
 362 *   MULTIFD_FLAG_SYNC message only for each round of RAM scan.  Normally
 363 *   it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
 364 *   based migrations.
 365 *
 366 * One thing to mention is mapped-ram always use the modern way to sync.
 367 */
 368
 369/* Do we need a per-section multifd flush (legacy way)? */
 370bool multifd_ram_sync_per_section(void)
 371{
 372    if (!migrate_multifd()) {
 373        return false;
 374    }
 375
 376    if (migrate_mapped_ram()) {
 377        return false;
 378    }
 379
 380    return migrate_multifd_flush_after_each_section();
 381}
 382
 383/* Do we need a per-round multifd flush (modern way)? */
 384bool multifd_ram_sync_per_round(void)
 385{
 386    if (!migrate_multifd()) {
 387        return false;
 388    }
 389
 390    if (migrate_mapped_ram()) {
 391        return true;
 392    }
 393
 394    return !migrate_multifd_flush_after_each_section();
 395}
 396
 397int multifd_ram_flush_and_sync(QEMUFile *f)
 398{
 399    MultiFDSyncReq req;
 400    int ret;
 401
 402    if (!migrate_multifd() || migration_in_postcopy()) {
 403        return 0;
 404    }
 405
 406    if (!multifd_payload_empty(multifd_ram_send)) {
 407        if (!multifd_send(&multifd_ram_send)) {
 408            error_report("%s: multifd_send fail", __func__);
 409            return -1;
 410        }
 411    }
 412
 413    /* File migrations only need to sync with threads */
 414    req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
 415
 416    ret = multifd_send_sync_main(req);
 417    if (ret) {
 418        return ret;
 419    }
 420
 421    /* If we don't need to sync with remote at all, nothing else to do */
 422    if (req == MULTIFD_SYNC_LOCAL) {
 423        return 0;
 424    }
 425
 426    /*
 427     * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
 428     * on RAM_SAVE_FLAG_EOS instead.
 429     */
 430    if (migrate_multifd_flush_after_each_section()) {
 431        return 0;
 432    }
 433
 434    qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
 435    qemu_fflush(f);
 436
 437    return 0;
 438}
 439
 440bool multifd_send_prepare_common(MultiFDSendParams *p)
 441{
 442    MultiFDPages_t *pages = &p->data->u.ram;
 443    multifd_ram_prepare_header(p);
 444    multifd_send_zero_page_detect(p);
 445
 446    if (!pages->normal_num) {
 447        p->next_packet_size = 0;
 448        return false;
 449    }
 450
 451    return true;
 452}
 453
 454static const MultiFDMethods multifd_nocomp_ops = {
 455    .send_setup = multifd_nocomp_send_setup,
 456    .send_cleanup = multifd_nocomp_send_cleanup,
 457    .send_prepare = multifd_nocomp_send_prepare,
 458    .recv_setup = multifd_nocomp_recv_setup,
 459    .recv_cleanup = multifd_nocomp_recv_cleanup,
 460    .recv = multifd_nocomp_recv
 461};
 462
 463static void multifd_nocomp_register(void)
 464{
 465    multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
 466}
 467
 468migration_init(multifd_nocomp_register);
 469