qemu/softmmu/dirtylimit.c
<<
>>
Prefs
   1/*
   2 * Dirty page rate limit implementation code
   3 *
   4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
   5 *
   6 * Authors:
   7 *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "qemu/main-loop.h"
  15#include "qapi/qapi-commands-migration.h"
  16#include "qapi/qmp/qdict.h"
  17#include "qapi/error.h"
  18#include "sysemu/dirtyrate.h"
  19#include "sysemu/dirtylimit.h"
  20#include "monitor/hmp.h"
  21#include "monitor/monitor.h"
  22#include "exec/memory.h"
  23#include "exec/target_page.h"
  24#include "hw/boards.h"
  25#include "sysemu/kvm.h"
  26#include "trace.h"
  27#include "migration/misc.h"
  28#include "migration/migration.h"
  29#include "migration/options.h"
  30
  31/*
  32 * Dirtylimit stop working if dirty page rate error
  33 * value less than DIRTYLIMIT_TOLERANCE_RANGE
  34 */
  35#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
  36/*
  37 * Plus or minus vcpu sleep time linearly if dirty
  38 * page rate error value percentage over
  39 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
  40 * Otherwise, plus or minus a fixed vcpu sleep time.
  41 */
  42#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
  43/*
  44 * Max vcpu sleep time percentage during a cycle
  45 * composed of dirty ring full and sleep time.
  46 */
  47#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
  48
  49struct {
  50    VcpuStat stat;
  51    bool running;
  52    QemuThread thread;
  53} *vcpu_dirty_rate_stat;
  54
  55typedef struct VcpuDirtyLimitState {
  56    int cpu_index;
  57    bool enabled;
  58    /*
  59     * Quota dirty page rate, unit is MB/s
  60     * zero if not enabled.
  61     */
  62    uint64_t quota;
  63} VcpuDirtyLimitState;
  64
  65struct {
  66    VcpuDirtyLimitState *states;
  67    /* Max cpus number configured by user */
  68    int max_cpus;
  69    /* Number of vcpu under dirtylimit */
  70    int limited_nvcpu;
  71} *dirtylimit_state;
  72
  73/* protect dirtylimit_state */
  74static QemuMutex dirtylimit_mutex;
  75
  76/* dirtylimit thread quit if dirtylimit_quit is true */
  77static bool dirtylimit_quit;
  78
  79static void vcpu_dirty_rate_stat_collect(void)
  80{
  81    MigrationState *s = migrate_get_current();
  82    VcpuStat stat;
  83    int i = 0;
  84    int64_t period = DIRTYLIMIT_CALC_TIME_MS;
  85
  86    if (migrate_dirty_limit() &&
  87        migration_is_active(s)) {
  88        period = s->parameters.x_vcpu_dirty_limit_period;
  89    }
  90
  91    /* calculate vcpu dirtyrate */
  92    vcpu_calculate_dirtyrate(period,
  93                              &stat,
  94                              GLOBAL_DIRTY_LIMIT,
  95                              false);
  96
  97    for (i = 0; i < stat.nvcpu; i++) {
  98        vcpu_dirty_rate_stat->stat.rates[i].id = i;
  99        vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
 100            stat.rates[i].dirty_rate;
 101    }
 102
 103    free(stat.rates);
 104}
 105
 106static void *vcpu_dirty_rate_stat_thread(void *opaque)
 107{
 108    rcu_register_thread();
 109
 110    /* start log sync */
 111    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
 112
 113    while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
 114        vcpu_dirty_rate_stat_collect();
 115        if (dirtylimit_in_service()) {
 116            dirtylimit_process();
 117        }
 118    }
 119
 120    /* stop log sync */
 121    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
 122
 123    rcu_unregister_thread();
 124    return NULL;
 125}
 126
 127int64_t vcpu_dirty_rate_get(int cpu_index)
 128{
 129    DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
 130    return qatomic_read_i64(&rates[cpu_index].dirty_rate);
 131}
 132
 133void vcpu_dirty_rate_stat_start(void)
 134{
 135    if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
 136        return;
 137    }
 138
 139    qatomic_set(&vcpu_dirty_rate_stat->running, 1);
 140    qemu_thread_create(&vcpu_dirty_rate_stat->thread,
 141                       "dirtyrate-stat",
 142                       vcpu_dirty_rate_stat_thread,
 143                       NULL,
 144                       QEMU_THREAD_JOINABLE);
 145}
 146
 147void vcpu_dirty_rate_stat_stop(void)
 148{
 149    qatomic_set(&vcpu_dirty_rate_stat->running, 0);
 150    dirtylimit_state_unlock();
 151    qemu_mutex_unlock_iothread();
 152    qemu_thread_join(&vcpu_dirty_rate_stat->thread);
 153    qemu_mutex_lock_iothread();
 154    dirtylimit_state_lock();
 155}
 156
 157void vcpu_dirty_rate_stat_initialize(void)
 158{
 159    MachineState *ms = MACHINE(qdev_get_machine());
 160    int max_cpus = ms->smp.max_cpus;
 161
 162    vcpu_dirty_rate_stat =
 163        g_malloc0(sizeof(*vcpu_dirty_rate_stat));
 164
 165    vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
 166    vcpu_dirty_rate_stat->stat.rates =
 167        g_new0(DirtyRateVcpu, max_cpus);
 168
 169    vcpu_dirty_rate_stat->running = false;
 170}
 171
 172void vcpu_dirty_rate_stat_finalize(void)
 173{
 174    free(vcpu_dirty_rate_stat->stat.rates);
 175    vcpu_dirty_rate_stat->stat.rates = NULL;
 176
 177    free(vcpu_dirty_rate_stat);
 178    vcpu_dirty_rate_stat = NULL;
 179}
 180
 181void dirtylimit_state_lock(void)
 182{
 183    qemu_mutex_lock(&dirtylimit_mutex);
 184}
 185
 186void dirtylimit_state_unlock(void)
 187{
 188    qemu_mutex_unlock(&dirtylimit_mutex);
 189}
 190
 191static void
 192__attribute__((__constructor__)) dirtylimit_mutex_init(void)
 193{
 194    qemu_mutex_init(&dirtylimit_mutex);
 195}
 196
 197static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
 198{
 199    return &dirtylimit_state->states[cpu_index];
 200}
 201
 202void dirtylimit_state_initialize(void)
 203{
 204    MachineState *ms = MACHINE(qdev_get_machine());
 205    int max_cpus = ms->smp.max_cpus;
 206    int i;
 207
 208    dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
 209
 210    dirtylimit_state->states =
 211            g_new0(VcpuDirtyLimitState, max_cpus);
 212
 213    for (i = 0; i < max_cpus; i++) {
 214        dirtylimit_state->states[i].cpu_index = i;
 215    }
 216
 217    dirtylimit_state->max_cpus = max_cpus;
 218    trace_dirtylimit_state_initialize(max_cpus);
 219}
 220
 221void dirtylimit_state_finalize(void)
 222{
 223    free(dirtylimit_state->states);
 224    dirtylimit_state->states = NULL;
 225
 226    free(dirtylimit_state);
 227    dirtylimit_state = NULL;
 228
 229    trace_dirtylimit_state_finalize();
 230}
 231
 232bool dirtylimit_in_service(void)
 233{
 234    return !!dirtylimit_state;
 235}
 236
 237bool dirtylimit_vcpu_index_valid(int cpu_index)
 238{
 239    MachineState *ms = MACHINE(qdev_get_machine());
 240
 241    return !(cpu_index < 0 ||
 242             cpu_index >= ms->smp.max_cpus);
 243}
 244
 245static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
 246{
 247    static uint64_t max_dirtyrate;
 248    uint64_t dirty_ring_size_MiB;
 249
 250    dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
 251
 252    if (max_dirtyrate < dirtyrate) {
 253        max_dirtyrate = dirtyrate;
 254    }
 255
 256    return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
 257}
 258
 259static inline bool dirtylimit_done(uint64_t quota,
 260                                   uint64_t current)
 261{
 262    uint64_t min, max;
 263
 264    min = MIN(quota, current);
 265    max = MAX(quota, current);
 266
 267    return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
 268}
 269
 270static inline bool
 271dirtylimit_need_linear_adjustment(uint64_t quota,
 272                                  uint64_t current)
 273{
 274    uint64_t min, max;
 275
 276    min = MIN(quota, current);
 277    max = MAX(quota, current);
 278
 279    return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
 280}
 281
 282static void dirtylimit_set_throttle(CPUState *cpu,
 283                                    uint64_t quota,
 284                                    uint64_t current)
 285{
 286    int64_t ring_full_time_us = 0;
 287    uint64_t sleep_pct = 0;
 288    uint64_t throttle_us = 0;
 289
 290    if (current == 0) {
 291        cpu->throttle_us_per_full = 0;
 292        return;
 293    }
 294
 295    ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
 296
 297    if (dirtylimit_need_linear_adjustment(quota, current)) {
 298        if (quota < current) {
 299            sleep_pct = (current - quota) * 100 / current;
 300            throttle_us =
 301                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
 302            cpu->throttle_us_per_full += throttle_us;
 303        } else {
 304            sleep_pct = (quota - current) * 100 / quota;
 305            throttle_us =
 306                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
 307            cpu->throttle_us_per_full -= throttle_us;
 308        }
 309
 310        trace_dirtylimit_throttle_pct(cpu->cpu_index,
 311                                      sleep_pct,
 312                                      throttle_us);
 313    } else {
 314        if (quota < current) {
 315            cpu->throttle_us_per_full += ring_full_time_us / 10;
 316        } else {
 317            cpu->throttle_us_per_full -= ring_full_time_us / 10;
 318        }
 319    }
 320
 321    /*
 322     * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
 323     *       current dirty page rate may never reach the quota, we should stop
 324     *       increasing sleep time?
 325     */
 326    cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
 327        ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
 328
 329    cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
 330}
 331
 332static void dirtylimit_adjust_throttle(CPUState *cpu)
 333{
 334    uint64_t quota = 0;
 335    uint64_t current = 0;
 336    int cpu_index = cpu->cpu_index;
 337
 338    quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
 339    current = vcpu_dirty_rate_get(cpu_index);
 340
 341    if (!dirtylimit_done(quota, current)) {
 342        dirtylimit_set_throttle(cpu, quota, current);
 343    }
 344
 345    return;
 346}
 347
 348void dirtylimit_process(void)
 349{
 350    CPUState *cpu;
 351
 352    if (!qatomic_read(&dirtylimit_quit)) {
 353        dirtylimit_state_lock();
 354
 355        if (!dirtylimit_in_service()) {
 356            dirtylimit_state_unlock();
 357            return;
 358        }
 359
 360        CPU_FOREACH(cpu) {
 361            if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
 362                continue;
 363            }
 364            dirtylimit_adjust_throttle(cpu);
 365        }
 366        dirtylimit_state_unlock();
 367    }
 368}
 369
 370void dirtylimit_change(bool start)
 371{
 372    if (start) {
 373        qatomic_set(&dirtylimit_quit, 0);
 374    } else {
 375        qatomic_set(&dirtylimit_quit, 1);
 376    }
 377}
 378
 379void dirtylimit_set_vcpu(int cpu_index,
 380                         uint64_t quota,
 381                         bool enable)
 382{
 383    trace_dirtylimit_set_vcpu(cpu_index, quota);
 384
 385    if (enable) {
 386        dirtylimit_state->states[cpu_index].quota = quota;
 387        if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
 388            dirtylimit_state->limited_nvcpu++;
 389        }
 390    } else {
 391        dirtylimit_state->states[cpu_index].quota = 0;
 392        if (dirtylimit_state->states[cpu_index].enabled) {
 393            dirtylimit_state->limited_nvcpu--;
 394        }
 395    }
 396
 397    dirtylimit_state->states[cpu_index].enabled = enable;
 398}
 399
 400void dirtylimit_set_all(uint64_t quota,
 401                        bool enable)
 402{
 403    MachineState *ms = MACHINE(qdev_get_machine());
 404    int max_cpus = ms->smp.max_cpus;
 405    int i;
 406
 407    for (i = 0; i < max_cpus; i++) {
 408        dirtylimit_set_vcpu(i, quota, enable);
 409    }
 410}
 411
 412void dirtylimit_vcpu_execute(CPUState *cpu)
 413{
 414    if (dirtylimit_in_service() &&
 415        dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
 416        cpu->throttle_us_per_full) {
 417        trace_dirtylimit_vcpu_execute(cpu->cpu_index,
 418                cpu->throttle_us_per_full);
 419        usleep(cpu->throttle_us_per_full);
 420    }
 421}
 422
 423static void dirtylimit_init(void)
 424{
 425    dirtylimit_state_initialize();
 426    dirtylimit_change(true);
 427    vcpu_dirty_rate_stat_initialize();
 428    vcpu_dirty_rate_stat_start();
 429}
 430
 431static void dirtylimit_cleanup(void)
 432{
 433    vcpu_dirty_rate_stat_stop();
 434    vcpu_dirty_rate_stat_finalize();
 435    dirtylimit_change(false);
 436    dirtylimit_state_finalize();
 437}
 438
 439/*
 440 * dirty page rate limit is not allowed to set if migration
 441 * is running with dirty-limit capability enabled.
 442 */
 443static bool dirtylimit_is_allowed(void)
 444{
 445    MigrationState *ms = migrate_get_current();
 446
 447    if (migration_is_running(ms->state) &&
 448        (!qemu_thread_is_self(&ms->thread)) &&
 449        migrate_dirty_limit() &&
 450        dirtylimit_in_service()) {
 451        return false;
 452    }
 453    return true;
 454}
 455
 456void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
 457                                 int64_t cpu_index,
 458                                 Error **errp)
 459{
 460    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
 461        return;
 462    }
 463
 464    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
 465        error_setg(errp, "incorrect cpu index specified");
 466        return;
 467    }
 468
 469    if (!dirtylimit_is_allowed()) {
 470        error_setg(errp, "can't cancel dirty page rate limit while"
 471                   " migration is running");
 472        return;
 473    }
 474
 475    if (!dirtylimit_in_service()) {
 476        return;
 477    }
 478
 479    dirtylimit_state_lock();
 480
 481    if (has_cpu_index) {
 482        dirtylimit_set_vcpu(cpu_index, 0, false);
 483    } else {
 484        dirtylimit_set_all(0, false);
 485    }
 486
 487    if (!dirtylimit_state->limited_nvcpu) {
 488        dirtylimit_cleanup();
 489    }
 490
 491    dirtylimit_state_unlock();
 492}
 493
 494void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
 495{
 496    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 497    Error *err = NULL;
 498
 499    qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
 500    if (err) {
 501        hmp_handle_error(mon, err);
 502        return;
 503    }
 504
 505    monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
 506                   "dirty limit for virtual CPU]\n");
 507}
 508
 509void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
 510                              int64_t cpu_index,
 511                              uint64_t dirty_rate,
 512                              Error **errp)
 513{
 514    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
 515        error_setg(errp, "dirty page limit feature requires KVM with"
 516                   " accelerator property 'dirty-ring-size' set'");
 517        return;
 518    }
 519
 520    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
 521        error_setg(errp, "incorrect cpu index specified");
 522        return;
 523    }
 524
 525    if (!dirtylimit_is_allowed()) {
 526        error_setg(errp, "can't set dirty page rate limit while"
 527                   " migration is running");
 528        return;
 529    }
 530
 531    if (!dirty_rate) {
 532        qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
 533        return;
 534    }
 535
 536    dirtylimit_state_lock();
 537
 538    if (!dirtylimit_in_service()) {
 539        dirtylimit_init();
 540    }
 541
 542    if (has_cpu_index) {
 543        dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
 544    } else {
 545        dirtylimit_set_all(dirty_rate, true);
 546    }
 547
 548    dirtylimit_state_unlock();
 549}
 550
 551void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
 552{
 553    int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
 554    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
 555    Error *err = NULL;
 556
 557    if (dirty_rate < 0) {
 558        error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
 559        goto out;
 560    }
 561
 562    qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
 563
 564out:
 565    hmp_handle_error(mon, err);
 566}
 567
 568/* Return the max throttle time of each virtual CPU */
 569uint64_t dirtylimit_throttle_time_per_round(void)
 570{
 571    CPUState *cpu;
 572    int64_t max = 0;
 573
 574    CPU_FOREACH(cpu) {
 575        if (cpu->throttle_us_per_full > max) {
 576            max = cpu->throttle_us_per_full;
 577        }
 578    }
 579
 580    return max;
 581}
 582
 583/*
 584 * Estimate average dirty ring full time of each virtaul CPU.
 585 * Return 0 if guest doesn't dirty memory.
 586 */
 587uint64_t dirtylimit_ring_full_time(void)
 588{
 589    CPUState *cpu;
 590    uint64_t curr_rate = 0;
 591    int nvcpus = 0;
 592
 593    CPU_FOREACH(cpu) {
 594        if (cpu->running) {
 595            nvcpus++;
 596            curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
 597        }
 598    }
 599
 600    if (!curr_rate || !nvcpus) {
 601        return 0;
 602    }
 603
 604    return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
 605}
 606
 607static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
 608{
 609    DirtyLimitInfo *info = NULL;
 610
 611    info = g_malloc0(sizeof(*info));
 612    info->cpu_index = cpu_index;
 613    info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
 614    info->current_rate = vcpu_dirty_rate_get(cpu_index);
 615
 616    return info;
 617}
 618
 619static struct DirtyLimitInfoList *dirtylimit_query_all(void)
 620{
 621    int i, index;
 622    DirtyLimitInfo *info = NULL;
 623    DirtyLimitInfoList *head = NULL, **tail = &head;
 624
 625    dirtylimit_state_lock();
 626
 627    if (!dirtylimit_in_service()) {
 628        dirtylimit_state_unlock();
 629        return NULL;
 630    }
 631
 632    for (i = 0; i < dirtylimit_state->max_cpus; i++) {
 633        index = dirtylimit_state->states[i].cpu_index;
 634        if (dirtylimit_vcpu_get_state(index)->enabled) {
 635            info = dirtylimit_query_vcpu(index);
 636            QAPI_LIST_APPEND(tail, info);
 637        }
 638    }
 639
 640    dirtylimit_state_unlock();
 641
 642    return head;
 643}
 644
 645struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
 646{
 647    if (!dirtylimit_in_service()) {
 648        return NULL;
 649    }
 650
 651    return dirtylimit_query_all();
 652}
 653
 654void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
 655{
 656    DirtyLimitInfoList *limit, *head, *info = NULL;
 657    Error *err = NULL;
 658
 659    if (!dirtylimit_in_service()) {
 660        monitor_printf(mon, "Dirty page limit not enabled!\n");
 661        return;
 662    }
 663
 664    info = qmp_query_vcpu_dirty_limit(&err);
 665    if (err) {
 666        hmp_handle_error(mon, err);
 667        return;
 668    }
 669
 670    head = info;
 671    for (limit = head; limit != NULL; limit = limit->next) {
 672        monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
 673                            " current rate %"PRIi64 " (MB/s)\n",
 674                            limit->value->cpu_index,
 675                            limit->value->limit_rate,
 676                            limit->value->current_rate);
 677    }
 678
 679    g_free(info);
 680}
 681