qemu/migration/dirtyrate.c
<<
>>
Prefs
   1/*
   2 * Dirtyrate implement code
   3 *
   4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
   5 *
   6 * Authors:
   7 *  Chuan Zheng <zhengchuan@huawei.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "qemu/error-report.h"
  15#include "hw/core/cpu.h"
  16#include "qapi/error.h"
  17#include "system/ramblock.h"
  18#include "exec/target_page.h"
  19#include "qemu/rcu_queue.h"
  20#include "qemu/main-loop.h"
  21#include "qapi/qapi-commands-migration.h"
  22#include "ram.h"
  23#include "trace.h"
  24#include "dirtyrate.h"
  25#include "monitor/hmp.h"
  26#include "monitor/monitor.h"
  27#include "qobject/qdict.h"
  28#include "system/kvm.h"
  29#include "system/runstate.h"
  30#include "system/memory.h"
  31#include "qemu/xxhash.h"
  32#include "migration.h"
  33
  34/*
  35 * total_dirty_pages is procted by BQL and is used
  36 * to stat dirty pages during the period of two
  37 * memory_global_dirty_log_sync
  38 */
  39uint64_t total_dirty_pages;
  40
  41typedef struct DirtyPageRecord {
  42    uint64_t start_pages;
  43    uint64_t end_pages;
  44} DirtyPageRecord;
  45
  46static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
  47static struct DirtyRateStat DirtyStat;
  48static DirtyRateMeasureMode dirtyrate_mode =
  49                DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
  50
  51static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
  52{
  53    int64_t current_time;
  54
  55    current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  56    if ((current_time - initial_time) >= msec) {
  57        msec = current_time - initial_time;
  58    } else {
  59        g_usleep((msec + initial_time - current_time) * 1000);
  60        /* g_usleep may overshoot */
  61        msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
  62    }
  63
  64    return msec;
  65}
  66
  67static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
  68                                     CPUState *cpu, bool start)
  69{
  70    if (start) {
  71        dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
  72    } else {
  73        dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
  74    }
  75}
  76
  77static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
  78                                      int64_t calc_time_ms)
  79{
  80    uint64_t increased_dirty_pages =
  81        dirty_pages.end_pages - dirty_pages.start_pages;
  82
  83    /*
  84     * multiply by 1000ms/s _before_ converting down to megabytes
  85     * to avoid losing precision
  86     */
  87    return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
  88        calc_time_ms;
  89}
  90
  91void global_dirty_log_change(unsigned int flag, bool start)
  92{
  93    Error *local_err = NULL;
  94    bool ret;
  95
  96    bql_lock();
  97    if (start) {
  98        ret = memory_global_dirty_log_start(flag, &local_err);
  99        if (!ret) {
 100            error_report_err(local_err);
 101        }
 102    } else {
 103        memory_global_dirty_log_stop(flag);
 104    }
 105    bql_unlock();
 106}
 107
 108/*
 109 * global_dirty_log_sync
 110 * 1. sync dirty log from kvm
 111 * 2. stop dirty tracking if needed.
 112 */
 113static void global_dirty_log_sync(unsigned int flag, bool one_shot)
 114{
 115    bql_lock();
 116    memory_global_dirty_log_sync(false);
 117    if (one_shot) {
 118        memory_global_dirty_log_stop(flag);
 119    }
 120    bql_unlock();
 121}
 122
 123static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
 124{
 125    CPUState *cpu;
 126    int nvcpu = 0;
 127
 128    CPU_FOREACH(cpu) {
 129        nvcpu++;
 130    }
 131
 132    stat->nvcpu = nvcpu;
 133    stat->rates = g_new0(DirtyRateVcpu, nvcpu);
 134
 135    return g_new0(DirtyPageRecord, nvcpu);
 136}
 137
 138static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
 139                                    bool start)
 140{
 141    CPUState *cpu;
 142
 143    CPU_FOREACH(cpu) {
 144        record_dirtypages(records, cpu, start);
 145    }
 146}
 147
 148int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
 149                                 VcpuStat *stat,
 150                                 unsigned int flag,
 151                                 bool one_shot)
 152{
 153    DirtyPageRecord *records = NULL;
 154    int64_t init_time_ms;
 155    int64_t duration;
 156    int64_t dirtyrate;
 157    int i = 0;
 158    unsigned int gen_id = 0;
 159
 160retry:
 161    init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 162
 163    WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
 164        gen_id = cpu_list_generation_id_get();
 165        records = vcpu_dirty_stat_alloc(stat);
 166        vcpu_dirty_stat_collect(records, true);
 167    }
 168
 169    duration = dirty_stat_wait(calc_time_ms, init_time_ms);
 170
 171    global_dirty_log_sync(flag, one_shot);
 172
 173    WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
 174        if (gen_id != cpu_list_generation_id_get()) {
 175            g_free(records);
 176            g_free(stat->rates);
 177            cpu_list_unlock();
 178            goto retry;
 179        }
 180        vcpu_dirty_stat_collect(records, false);
 181    }
 182
 183    for (i = 0; i < stat->nvcpu; i++) {
 184        dirtyrate = do_calculate_dirtyrate(records[i], duration);
 185
 186        stat->rates[i].id = i;
 187        stat->rates[i].dirty_rate = dirtyrate;
 188
 189        trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
 190    }
 191
 192    g_free(records);
 193
 194    return duration;
 195}
 196
 197static bool is_calc_time_valid(int64_t msec)
 198{
 199    if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
 200        return false;
 201    }
 202
 203    return true;
 204}
 205
 206static bool is_sample_pages_valid(int64_t pages)
 207{
 208    return pages >= MIN_SAMPLE_PAGE_COUNT &&
 209           pages <= MAX_SAMPLE_PAGE_COUNT;
 210}
 211
 212static int dirtyrate_set_state(int *state, int old_state, int new_state)
 213{
 214    assert(new_state < DIRTY_RATE_STATUS__MAX);
 215    trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
 216    if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
 217        return 0;
 218    } else {
 219        return -1;
 220    }
 221}
 222
 223/* Decimal power of given time unit relative to one second */
 224static int time_unit_to_power(TimeUnit time_unit)
 225{
 226    switch (time_unit) {
 227    case TIME_UNIT_SECOND:
 228        return 0;
 229    case TIME_UNIT_MILLISECOND:
 230        return -3;
 231    default:
 232        g_assert_not_reached();
 233    }
 234}
 235
 236static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
 237                                 TimeUnit unit_to)
 238{
 239    int power = time_unit_to_power(unit_from) -
 240                time_unit_to_power(unit_to);
 241    while (power < 0) {
 242        value /= 10;
 243        power += 1;
 244    }
 245    while (power > 0) {
 246        value *= 10;
 247        power -= 1;
 248    }
 249    return value;
 250}
 251
 252
 253static struct DirtyRateInfo *
 254query_dirty_rate_info(TimeUnit calc_time_unit)
 255{
 256    int i;
 257    int64_t dirty_rate = DirtyStat.dirty_rate;
 258    struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
 259    DirtyRateVcpuList *head = NULL, **tail = &head;
 260
 261    info->status = CalculatingState;
 262    info->start_time = DirtyStat.start_time;
 263    info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
 264                                        TIME_UNIT_MILLISECOND,
 265                                        calc_time_unit);
 266    info->calc_time_unit = calc_time_unit;
 267    info->sample_pages = DirtyStat.sample_pages;
 268    info->mode = dirtyrate_mode;
 269
 270    if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
 271        info->has_dirty_rate = true;
 272        info->dirty_rate = dirty_rate;
 273
 274        if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
 275            /*
 276             * set sample_pages with 0 to indicate page sampling
 277             * isn't enabled
 278             **/
 279            info->sample_pages = 0;
 280            info->has_vcpu_dirty_rate = true;
 281            for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
 282                DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
 283                rate->id = DirtyStat.dirty_ring.rates[i].id;
 284                rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
 285                QAPI_LIST_APPEND(tail, rate);
 286            }
 287            info->vcpu_dirty_rate = head;
 288        }
 289
 290        if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
 291            info->sample_pages = 0;
 292        }
 293    }
 294
 295    trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
 296
 297    return info;
 298}
 299
 300static void init_dirtyrate_stat(struct DirtyRateConfig config)
 301{
 302    DirtyStat.dirty_rate = -1;
 303    DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 304    DirtyStat.calc_time_ms = config.calc_time_ms;
 305    DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
 306
 307    switch (config.mode) {
 308    case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
 309        DirtyStat.page_sampling.total_dirty_samples = 0;
 310        DirtyStat.page_sampling.total_sample_count = 0;
 311        DirtyStat.page_sampling.total_block_mem_MB = 0;
 312        break;
 313    case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
 314        DirtyStat.dirty_ring.nvcpu = -1;
 315        DirtyStat.dirty_ring.rates = NULL;
 316        break;
 317    default:
 318        break;
 319    }
 320}
 321
 322static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
 323{
 324    /* last calc-dirty-rate qmp use dirty ring mode */
 325    if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
 326        free(DirtyStat.dirty_ring.rates);
 327        DirtyStat.dirty_ring.rates = NULL;
 328    }
 329}
 330
 331static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
 332{
 333    DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
 334    DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
 335    /* size of total pages in MB */
 336    DirtyStat.page_sampling.total_block_mem_MB +=
 337        qemu_target_pages_to_MiB(info->ramblock_pages);
 338}
 339
 340static void update_dirtyrate(uint64_t msec)
 341{
 342    uint64_t dirtyrate;
 343    uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
 344    uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
 345    uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
 346
 347    dirtyrate = total_dirty_samples * total_block_mem_MB *
 348                1000 / (total_sample_count * msec);
 349
 350    DirtyStat.dirty_rate = dirtyrate;
 351}
 352
 353/*
 354 * Compute hash of a single page of size TARGET_PAGE_SIZE.
 355 */
 356static uint32_t compute_page_hash(void *ptr)
 357{
 358    size_t page_size = qemu_target_page_size();
 359    uint32_t i;
 360    uint64_t v1, v2, v3, v4;
 361    uint64_t res;
 362    const uint64_t *p = ptr;
 363
 364    v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
 365    v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
 366    v3 = QEMU_XXHASH_SEED + 0;
 367    v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
 368    for (i = 0; i < page_size / 8; i += 4) {
 369        v1 = XXH64_round(v1, p[i + 0]);
 370        v2 = XXH64_round(v2, p[i + 1]);
 371        v3 = XXH64_round(v3, p[i + 2]);
 372        v4 = XXH64_round(v4, p[i + 3]);
 373    }
 374    res = XXH64_mergerounds(v1, v2, v3, v4);
 375    res += page_size;
 376    res = XXH64_avalanche(res);
 377    return (uint32_t)(res & UINT32_MAX);
 378}
 379
 380
 381/*
 382 * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
 383 * in ramblock, which starts from ramblock base address.
 384 */
 385static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
 386                                      uint64_t vfn)
 387{
 388    uint32_t hash;
 389
 390    hash = compute_page_hash(info->ramblock_addr +
 391                             vfn * qemu_target_page_size());
 392
 393    trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
 394    return hash;
 395}
 396
 397static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
 398{
 399    unsigned int sample_pages_count;
 400    int i;
 401    GRand *rand;
 402
 403    sample_pages_count = info->sample_pages_count;
 404
 405    /* ramblock size less than one page, return success to skip this ramblock */
 406    if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
 407        return true;
 408    }
 409
 410    info->hash_result = g_try_malloc0_n(sample_pages_count,
 411                                        sizeof(uint32_t));
 412    if (!info->hash_result) {
 413        return false;
 414    }
 415
 416    info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
 417                                            sizeof(uint64_t));
 418    if (!info->sample_page_vfn) {
 419        g_free(info->hash_result);
 420        return false;
 421    }
 422
 423    rand  = g_rand_new();
 424    for (i = 0; i < sample_pages_count; i++) {
 425        info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
 426                                                    info->ramblock_pages - 1);
 427        info->hash_result[i] = get_ramblock_vfn_hash(info,
 428                                                     info->sample_page_vfn[i]);
 429    }
 430    g_rand_free(rand);
 431
 432    return true;
 433}
 434
 435static void get_ramblock_dirty_info(RAMBlock *block,
 436                                    struct RamblockDirtyInfo *info,
 437                                    struct DirtyRateConfig *config)
 438{
 439    uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
 440    gsize len;
 441
 442    /* Right shift 30 bits to calc ramblock size in GB */
 443    info->sample_pages_count = (qemu_ram_get_used_length(block) *
 444                                sample_pages_per_gigabytes) >> 30;
 445    /* Right shift TARGET_PAGE_BITS to calc page count */
 446    info->ramblock_pages = qemu_ram_get_used_length(block) >>
 447                           qemu_target_page_bits();
 448    info->ramblock_addr = qemu_ram_get_host_addr(block);
 449    len = g_strlcpy(info->idstr, qemu_ram_get_idstr(block),
 450                    sizeof(info->idstr));
 451    g_assert(len < sizeof(info->idstr));
 452}
 453
 454static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
 455{
 456    int i;
 457
 458    if (!infos) {
 459        return;
 460    }
 461
 462    for (i = 0; i < count; i++) {
 463        g_free(infos[i].sample_page_vfn);
 464        g_free(infos[i].hash_result);
 465    }
 466    g_free(infos);
 467}
 468
 469static bool skip_sample_ramblock(RAMBlock *block)
 470{
 471    /*
 472     * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
 473     */
 474    if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
 475        trace_skip_sample_ramblock(block->idstr,
 476                                   qemu_ram_get_used_length(block));
 477        return true;
 478    }
 479
 480    return false;
 481}
 482
 483static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
 484                                      struct DirtyRateConfig config,
 485                                      int *block_count)
 486{
 487    struct RamblockDirtyInfo *info = NULL;
 488    struct RamblockDirtyInfo *dinfo = NULL;
 489    RAMBlock *block = NULL;
 490    int total_count = 0;
 491    int index = 0;
 492    bool ret = false;
 493
 494    RAMBLOCK_FOREACH_MIGRATABLE(block) {
 495        if (skip_sample_ramblock(block)) {
 496            continue;
 497        }
 498        total_count++;
 499    }
 500
 501    dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
 502    if (dinfo == NULL) {
 503        goto out;
 504    }
 505
 506    RAMBLOCK_FOREACH_MIGRATABLE(block) {
 507        if (skip_sample_ramblock(block)) {
 508            continue;
 509        }
 510        if (index >= total_count) {
 511            break;
 512        }
 513        info = &dinfo[index];
 514        get_ramblock_dirty_info(block, info, &config);
 515        if (!save_ramblock_hash(info)) {
 516            goto out;
 517        }
 518        index++;
 519    }
 520    ret = true;
 521
 522out:
 523    *block_count = index;
 524    *block_dinfo = dinfo;
 525    return ret;
 526}
 527
 528static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
 529{
 530    uint32_t hash;
 531    int i;
 532
 533    for (i = 0; i < info->sample_pages_count; i++) {
 534        hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
 535        if (hash != info->hash_result[i]) {
 536            trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
 537            info->sample_dirty_count++;
 538        }
 539    }
 540}
 541
 542static struct RamblockDirtyInfo *
 543find_block_matched(RAMBlock *block, int count,
 544                  struct RamblockDirtyInfo *infos)
 545{
 546    int i;
 547
 548    for (i = 0; i < count; i++) {
 549        if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
 550            break;
 551        }
 552    }
 553
 554    if (i == count) {
 555        return NULL;
 556    }
 557
 558    if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
 559        infos[i].ramblock_pages !=
 560            (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
 561        trace_find_page_matched(block->idstr);
 562        return NULL;
 563    }
 564
 565    return &infos[i];
 566}
 567
 568static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
 569                                  int block_count)
 570{
 571    struct RamblockDirtyInfo *block_dinfo = NULL;
 572    RAMBlock *block = NULL;
 573
 574    RAMBLOCK_FOREACH_MIGRATABLE(block) {
 575        if (skip_sample_ramblock(block)) {
 576            continue;
 577        }
 578        block_dinfo = find_block_matched(block, block_count, info);
 579        if (block_dinfo == NULL) {
 580            continue;
 581        }
 582        calc_page_dirty_rate(block_dinfo);
 583        update_dirtyrate_stat(block_dinfo);
 584    }
 585
 586    if (DirtyStat.page_sampling.total_sample_count == 0) {
 587        return false;
 588    }
 589
 590    return true;
 591}
 592
 593static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
 594                                            bool start)
 595{
 596    if (start) {
 597        dirty_pages->start_pages = total_dirty_pages;
 598    } else {
 599        dirty_pages->end_pages = total_dirty_pages;
 600    }
 601}
 602
 603static inline void dirtyrate_manual_reset_protect(void)
 604{
 605    RAMBlock *block = NULL;
 606
 607    WITH_RCU_READ_LOCK_GUARD() {
 608        RAMBLOCK_FOREACH_MIGRATABLE(block) {
 609            memory_region_clear_dirty_bitmap(block->mr, 0,
 610                                             block->used_length);
 611        }
 612    }
 613}
 614
 615static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
 616{
 617    int64_t start_time;
 618    DirtyPageRecord dirty_pages;
 619    Error *local_err = NULL;
 620
 621    bql_lock();
 622    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
 623        error_report_err(local_err);
 624    }
 625
 626    /*
 627     * 1'round of log sync may return all 1 bits with
 628     * KVM_DIRTY_LOG_INITIALLY_SET enable
 629     * skip it unconditionally and start dirty tracking
 630     * from 2'round of log sync
 631     */
 632    memory_global_dirty_log_sync(false);
 633
 634    /*
 635     * reset page protect manually and unconditionally.
 636     * this make sure kvm dirty log be cleared if
 637     * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
 638     */
 639    dirtyrate_manual_reset_protect();
 640    bql_unlock();
 641
 642    record_dirtypages_bitmap(&dirty_pages, true);
 643
 644    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 645    DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 646
 647    DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
 648
 649    /*
 650     * do two things.
 651     * 1. fetch dirty bitmap from kvm
 652     * 2. stop dirty tracking
 653     */
 654    global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
 655
 656    record_dirtypages_bitmap(&dirty_pages, false);
 657
 658    DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
 659                                                  DirtyStat.calc_time_ms);
 660}
 661
 662static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
 663{
 664    uint64_t dirtyrate = 0;
 665    uint64_t dirtyrate_sum = 0;
 666    int i = 0;
 667
 668    /* start log sync */
 669    global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
 670
 671    DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 672
 673    /* calculate vcpu dirtyrate */
 674    DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
 675                                                      &DirtyStat.dirty_ring,
 676                                                      GLOBAL_DIRTY_DIRTY_RATE,
 677                                                      true);
 678
 679    /* calculate vm dirtyrate */
 680    for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
 681        dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
 682        DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
 683        dirtyrate_sum += dirtyrate;
 684    }
 685
 686    DirtyStat.dirty_rate = dirtyrate_sum;
 687}
 688
 689static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
 690{
 691    struct RamblockDirtyInfo *block_dinfo = NULL;
 692    int block_count = 0;
 693    int64_t initial_time;
 694
 695    rcu_read_lock();
 696    initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 697    DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
 698    if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
 699        goto out;
 700    }
 701    rcu_read_unlock();
 702
 703    DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
 704                                             initial_time);
 705
 706    rcu_read_lock();
 707    if (!compare_page_hash_info(block_dinfo, block_count)) {
 708        goto out;
 709    }
 710
 711    update_dirtyrate(DirtyStat.calc_time_ms);
 712
 713out:
 714    rcu_read_unlock();
 715    free_ramblock_dirty_info(block_dinfo, block_count);
 716}
 717
 718static void calculate_dirtyrate(struct DirtyRateConfig config)
 719{
 720    if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
 721        calculate_dirtyrate_dirty_bitmap(config);
 722    } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
 723        calculate_dirtyrate_dirty_ring(config);
 724    } else {
 725        calculate_dirtyrate_sample_vm(config);
 726    }
 727
 728    trace_dirtyrate_calculate(DirtyStat.dirty_rate);
 729}
 730
 731void *get_dirtyrate_thread(void *arg)
 732{
 733    struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
 734    int ret;
 735    rcu_register_thread();
 736
 737    ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
 738                              DIRTY_RATE_STATUS_MEASURING);
 739    if (ret == -1) {
 740        error_report("change dirtyrate state failed.");
 741        return NULL;
 742    }
 743
 744    calculate_dirtyrate(config);
 745
 746    ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
 747                              DIRTY_RATE_STATUS_MEASURED);
 748    if (ret == -1) {
 749        error_report("change dirtyrate state failed.");
 750    }
 751
 752    rcu_unregister_thread();
 753    return NULL;
 754}
 755
 756void qmp_calc_dirty_rate(int64_t calc_time,
 757                         bool has_calc_time_unit,
 758                         TimeUnit calc_time_unit,
 759                         bool has_sample_pages,
 760                         int64_t sample_pages,
 761                         bool has_mode,
 762                         DirtyRateMeasureMode mode,
 763                         Error **errp)
 764{
 765    static struct DirtyRateConfig config;
 766    QemuThread thread;
 767    int ret;
 768
 769    /*
 770     * If the dirty rate is already being measured, don't attempt to start.
 771     */
 772    if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
 773        error_setg(errp, "the dirty rate is already being measured.");
 774        return;
 775    }
 776
 777    int64_t calc_time_ms = convert_time_unit(
 778        calc_time,
 779        has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
 780        TIME_UNIT_MILLISECOND
 781    );
 782
 783    if (!is_calc_time_valid(calc_time_ms)) {
 784        error_setg(errp, "Calculation time is out of range [%dms, %dms].",
 785                         MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
 786        return;
 787    }
 788
 789    if (!has_mode) {
 790        mode =  DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
 791    }
 792
 793    if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
 794        error_setg(errp, "sample-pages is used only in page-sampling mode");
 795        return;
 796    }
 797
 798    if (has_sample_pages) {
 799        if (!is_sample_pages_valid(sample_pages)) {
 800            error_setg(errp, "sample-pages is out of range[%d, %d].",
 801                            MIN_SAMPLE_PAGE_COUNT,
 802                            MAX_SAMPLE_PAGE_COUNT);
 803            return;
 804        }
 805    } else {
 806        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
 807    }
 808
 809    /*
 810     * dirty ring mode only works when kvm dirty ring is enabled.
 811     * on the contrary, dirty bitmap mode is not.
 812     */
 813    if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
 814        !kvm_dirty_ring_enabled()) ||
 815        ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
 816         kvm_dirty_ring_enabled())) {
 817        error_setg(errp, "mode %s is not enabled, use other method instead.",
 818                         DirtyRateMeasureMode_str(mode));
 819         return;
 820    }
 821
 822    /*
 823     * Init calculation state as unstarted.
 824     */
 825    ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
 826                              DIRTY_RATE_STATUS_UNSTARTED);
 827    if (ret == -1) {
 828        error_setg(errp, "init dirty rate calculation state failed.");
 829        return;
 830    }
 831
 832    config.calc_time_ms = calc_time_ms;
 833    config.sample_pages_per_gigabytes = sample_pages;
 834    config.mode = mode;
 835
 836    cleanup_dirtyrate_stat(config);
 837
 838    /*
 839     * update dirty rate mode so that we can figure out what mode has
 840     * been used in last calculation
 841     **/
 842    dirtyrate_mode = mode;
 843
 844    init_dirtyrate_stat(config);
 845
 846    qemu_thread_create(&thread, MIGRATION_THREAD_DIRTY_RATE,
 847                       get_dirtyrate_thread, (void *)&config,
 848                       QEMU_THREAD_DETACHED);
 849}
 850
 851
 852struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
 853                                           TimeUnit calc_time_unit,
 854                                           Error **errp)
 855{
 856    return query_dirty_rate_info(
 857        has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
 858}
 859
 860void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
 861{
 862    DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
 863
 864    monitor_printf(mon, "Status: %s\n",
 865                   DirtyRateStatus_str(info->status));
 866    monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
 867                   info->start_time);
 868    if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
 869        monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
 870                       info->sample_pages);
 871    }
 872    monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
 873                   info->calc_time);
 874    monitor_printf(mon, "Mode: %s\n",
 875                   DirtyRateMeasureMode_str(info->mode));
 876    monitor_printf(mon, "Dirty rate: ");
 877    if (info->has_dirty_rate) {
 878        monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
 879        if (info->has_vcpu_dirty_rate) {
 880            DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
 881            for (rate = head; rate != NULL; rate = rate->next) {
 882                monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
 883                               " (MB/s)\n", rate->value->id,
 884                               rate->value->dirty_rate);
 885            }
 886        }
 887    } else {
 888        monitor_printf(mon, "(not ready)\n");
 889    }
 890
 891    qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
 892    g_free(info);
 893}
 894
 895void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
 896{
 897    int64_t sec = qdict_get_try_int(qdict, "second", 0);
 898    int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
 899    bool has_sample_pages = (sample_pages != -1);
 900    bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
 901    bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
 902    DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
 903    Error *err = NULL;
 904
 905    if (!sec) {
 906        monitor_printf(mon, "Incorrect period length specified!\n");
 907        return;
 908    }
 909
 910    if (dirty_ring && dirty_bitmap) {
 911        monitor_printf(mon, "Either dirty ring or dirty bitmap "
 912                       "can be specified!\n");
 913        return;
 914    }
 915
 916    if (dirty_bitmap) {
 917        mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
 918    } else if (dirty_ring) {
 919        mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
 920    }
 921
 922    qmp_calc_dirty_rate(sec, /* calc-time */
 923                        false, TIME_UNIT_SECOND, /* calc-time-unit */
 924                        has_sample_pages, sample_pages,
 925                        true, mode,
 926                        &err);
 927    if (err) {
 928        hmp_handle_error(mon, err);
 929        return;
 930    }
 931
 932    monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
 933                   " seconds\n", sec);
 934    monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
 935}
 936